diff --git "a/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb" "b/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
--- "a/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
+++ "b/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
@@ -3,13 +3,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "0aa470f4",
+   "id": "f1d7ccda",
    "metadata": {
     "papermill": {
-     "duration": 0.002959,
-     "end_time": "2023-09-01T14:53:06.162658",
+     "duration": 0.002567,
+     "end_time": "2023-09-02T06:16:28.958047",
      "exception": false,
-     "start_time": "2023-09-01T14:53:06.159699",
+     "start_time": "2023-09-02T06:16:28.955480",
      "status": "completed"
     },
     "tags": []
@@ -28,13 +28,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "30b4429d",
+   "id": "54efc4fa",
    "metadata": {
     "papermill": {
-     "duration": 0.001695,
-     "end_time": "2023-09-01T14:53:06.166400",
+     "duration": 0.001679,
+     "end_time": "2023-09-02T06:16:28.961787",
      "exception": false,
-     "start_time": "2023-09-01T14:53:06.164705",
+     "start_time": "2023-09-02T06:16:28.960108",
      "status": "completed"
     },
     "tags": []
@@ -46,19 +46,19 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "b9d8c411",
+   "id": "8b4caf1b",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:06.171899Z",
-     "iopub.status.busy": "2023-09-01T14:53:06.171621Z",
-     "iopub.status.idle": "2023-09-01T14:53:06.892025Z",
-     "shell.execute_reply": "2023-09-01T14:53:06.891053Z"
+     "iopub.execute_input": "2023-09-02T06:16:28.967200Z",
+     "iopub.status.busy": "2023-09-02T06:16:28.966328Z",
+     "iopub.status.idle": "2023-09-02T06:16:29.684585Z",
+     "shell.execute_reply": "2023-09-02T06:16:29.683732Z"
     },
     "papermill": {
-     "duration": 0.72586,
-     "end_time": "2023-09-01T14:53:06.894118",
+     "duration": 0.722814,
+     "end_time": "2023-09-02T06:16:29.686619",
      "exception": false,
-     "start_time": "2023-09-01T14:53:06.168258",
+     "start_time": "2023-09-02T06:16:28.963805",
      "status": "completed"
     },
     "tags": []
@@ -74,19 +74,19 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "d6a7b76f",
+   "id": "6cc3a721",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:06.899776Z",
-     "iopub.status.busy": "2023-09-01T14:53:06.899528Z",
-     "iopub.status.idle": "2023-09-01T14:53:09.788640Z",
-     "shell.execute_reply": "2023-09-01T14:53:09.787798Z"
+     "iopub.execute_input": "2023-09-02T06:16:29.691742Z",
+     "iopub.status.busy": "2023-09-02T06:16:29.691539Z",
+     "iopub.status.idle": "2023-09-02T06:16:32.527158Z",
+     "shell.execute_reply": "2023-09-02T06:16:32.526387Z"
     },
     "papermill": {
-     "duration": 2.894122,
-     "end_time": "2023-09-01T14:53:09.790308",
+     "duration": 2.840419,
+     "end_time": "2023-09-02T06:16:32.529061",
      "exception": false,
-     "start_time": "2023-09-01T14:53:06.896186",
+     "start_time": "2023-09-02T06:16:29.688642",
      "status": "completed"
     },
     "tags": []
@@ -118,19 +118,19 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "7f700082",
+   "id": "6c3b0fbe",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:09.796934Z",
-     "iopub.status.busy": "2023-09-01T14:53:09.795855Z",
-     "iopub.status.idle": "2023-09-01T14:53:09.802854Z",
-     "shell.execute_reply": "2023-09-01T14:53:09.802131Z"
+     "iopub.execute_input": "2023-09-02T06:16:32.534904Z",
+     "iopub.status.busy": "2023-09-02T06:16:32.534704Z",
+     "iopub.status.idle": "2023-09-02T06:16:32.541251Z",
+     "shell.execute_reply": "2023-09-02T06:16:32.540683Z"
     },
     "papermill": {
-     "duration": 0.011279,
-     "end_time": "2023-09-01T14:53:09.804026",
+     "duration": 0.010568,
+     "end_time": "2023-09-02T06:16:32.542402",
      "exception": false,
-     "start_time": "2023-09-01T14:53:09.792747",
+     "start_time": "2023-09-02T06:16:32.531834",
      "status": "completed"
     },
     "tags": []
@@ -193,19 +193,19 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "06ddc114",
+   "id": "2bb9275c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:09.809533Z",
-     "iopub.status.busy": "2023-09-01T14:53:09.808951Z",
-     "iopub.status.idle": "2023-09-01T14:53:10.832978Z",
-     "shell.execute_reply": "2023-09-01T14:53:10.832125Z"
+     "iopub.execute_input": "2023-09-02T06:16:32.547321Z",
+     "iopub.status.busy": "2023-09-02T06:16:32.547162Z",
+     "iopub.status.idle": "2023-09-02T06:17:41.766730Z",
+     "shell.execute_reply": "2023-09-02T06:17:41.765728Z"
     },
     "papermill": {
-     "duration": 1.028292,
-     "end_time": "2023-09-01T14:53:10.834448",
+     "duration": 69.223979,
+     "end_time": "2023-09-02T06:17:41.768492",
      "exception": false,
-     "start_time": "2023-09-01T14:53:09.806156",
+     "start_time": "2023-09-02T06:16:32.544513",
      "status": "completed"
     },
     "tags": []
@@ -215,22 +215,35 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2023-09-01 14:53:09--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n",
-      "Resolving huggingface.co (huggingface.co)... 18.172.134.4, 18.172.134.88, 18.172.134.124, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|18.172.134.4|:443... "
+      "--2023-09-02 06:16:32--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n",
+      "Resolving huggingface.co (huggingface.co)... "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "connected.\r\n"
+      "18.165.122.101, 18.165.122.11, 18.165.122.120, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|18.165.122.101|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "302 Found\r\n",
+      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/20fb328bd798a1e23967f80661a0b6a277f1d45ed2d90760cec68d32dfcbd516?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-mem-ctx-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-mem-ctx-4k.pth%22%3B&Expires=1693894592&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5Mzg5NDU5Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzIwZmIzMjhiZDc5OGExZTIzOTY3ZjgwNjYxYTBiNmEyNzdmMWQ0NWVkMmQ5MDc2MGNlYzY4ZDMyZGZjYmQ1MTY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=0lvghwVju7ugxwlMlvj%7EIeK4lhVAMO1bcdYQsP0FxGPt5AK88smuTfGJvprL1l1HiQVj4a7f6U8yvc97nQxIdWdTF0RC8s4SAyFcZoupxdMbzOQ7kEr7OgXQPIdUhXWFAQN8Jrd-gkTfbl1ZAR2HjikcsW0MZI2do5UoTezTx6aBHfDhSwsx0SOCKOXHdWmyvofJts71OggEykKwEqY0AH3CHBro5v88luav9avT6Ha3nrtoJQKwKBxYLkC5RVwbsw7L6Br%7EV14-DYVOr1EIrDRifwMfNjtbDrx5eoJiXJF4mpngng5zcaDIofe1LUprJ8oOglZbD8IwmUJyW6JiRw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
+      "--2023-09-02 06:16:32--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/20fb328bd798a1e23967f80661a0b6a277f1d45ed2d90760cec68d32dfcbd516?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-mem-ctx-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-mem-ctx-4k.pth%22%3B&Expires=1693894592&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5Mzg5NDU5Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzIwZmIzMjhiZDc5OGExZTIzOTY3ZjgwNjYxYTBiNmEyNzdmMWQ0NWVkMmQ5MDc2MGNlYzY4ZDMyZGZjYmQ1MTY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=0lvghwVju7ugxwlMlvj%7EIeK4lhVAMO1bcdYQsP0FxGPt5AK88smuTfGJvprL1l1HiQVj4a7f6U8yvc97nQxIdWdTF0RC8s4SAyFcZoupxdMbzOQ7kEr7OgXQPIdUhXWFAQN8Jrd-gkTfbl1ZAR2HjikcsW0MZI2do5UoTezTx6aBHfDhSwsx0SOCKOXHdWmyvofJts71OggEykKwEqY0AH3CHBro5v88luav9avT6Ha3nrtoJQKwKBxYLkC5RVwbsw7L6Br%7EV14-DYVOr1EIrDRifwMfNjtbDrx5eoJiXJF4mpngng5zcaDIofe1LUprJ8oOglZbD8IwmUJyW6JiRw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
+      "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "108.156.22.119, 108.156.22.58, 108.156.22.7, ...\r\n",
+      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.156.22.119|:443... connected.\r\n",
       "HTTP request sent, awaiting response... "
      ]
     },
@@ -238,7813 +251,34881 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "404 Not Found\r\n",
-      "2023-09-01 14:53:10 ERROR 404: Not Found.\r\n",
-      "\r\n"
+      "200 OK\r\n",
+      "Length: 2825976699 (2.6G) [binary/octet-stream]\r\n",
+      "Saving to: ‘v5-L96-D1024-E0_1-mem-ctx-4k.pth’\r\n",
+      "\r\n",
+      "\r",
+      "          v5-L96-D1   0%[                    ]       0  --.-KB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "total 4.0K\r\n",
-      "drwxr-xr-x  2 root root   10 Sep  1 14:53 .\r\n",
-      "drwxr-xr-x 19 root root 4.0K Sep  1 14:53 ..\r\n"
+      "\r",
+      "         v5-L96-D10   0%[                    ] 128.27K   472KB/s               "
      ]
-    }
-   ],
-   "source": [
-    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/{FILENAME_PREFIX}-mem-ctx-4k.pth\"\n",
-    "\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    ls -alh ."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d36a2b7c",
-   "metadata": {
-    "papermill": {
-     "duration": 0.002214,
-     "end_time": "2023-09-01T14:53:10.839328",
-     "exception": false,
-     "start_time": "2023-09-01T14:53:10.837114",
-     "status": "completed"
     },
-    "tags": []
-   },
-   "source": [
-    "## Tune 6 : Ramping up the ctx size (8192), memory training\n",
-    "\n",
-    "- Tune 6: Large ctx size (8192), Scaling up!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "7a59bf6c",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:10.845705Z",
-     "iopub.status.busy": "2023-09-01T14:53:10.844902Z",
-     "iopub.status.idle": "2023-09-01T14:53:17.948023Z",
-     "shell.execute_reply": "2023-09-01T14:53:17.947224Z"
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5-L96-D102   0%[                    ]   1.18M  2.49MB/s               "
+     ]
     },
-    "papermill": {
-     "duration": 7.16737,
-     "end_time": "2023-09-01T14:53:18.008993",
-     "exception": false,
-     "start_time": "2023-09-01T14:53:10.841623",
-     "status": "completed"
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5-L96-D1024   0%[                    ]   3.65M  5.41MB/s               "
+     ]
     },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Generating word reptition dataset ##\n"
+      "\r",
+      "      v5-L96-D1024-   0%[                    ]  10.10M  11.5MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 88 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+      "\r",
+      "     v5-L96-D1024-E   0%[                    ]  20.48M  18.5MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 49 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+      "\r",
+      "    v5-L96-D1024-E0   1%[                    ]  28.53M  21.6MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
+      "\r",
+      "   v5-L96-D1024-E0_   1%[                    ]  38.20M  25.0MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
+      "\r",
+      "  v5-L96-D1024-E0_1   1%[                    ]  45.14M  26.1MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
+      "\r",
+      " v5-L96-D1024-E0_1-   1%[                    ]  53.31M  27.3MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
+      "\r",
+      "v5-L96-D1024-E0_1-m   2%[                    ]  61.80M  28.7MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
+      "\r",
+      "5-L96-D1024-E0_1-me   2%[                    ]  68.40M  29.1MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
+      "\r",
+      "-L96-D1024-E0_1-mem   2%[                    ]  78.76M  30.8MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
+      "\r",
+      "L96-D1024-E0_1-mem-   3%[                    ]  85.85M  31.1MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
+      "\r",
+      "96-D1024-E0_1-mem-c   3%[                    ]  95.06M  31.9MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
+      "\r",
+      "6-D1024-E0_1-mem-ct   3%[                    ] 103.00M  32.3MB/s    eta 80s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 48 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+      "\r",
+      "-D1024-E0_1-mem-ctx   4%[                    ] 110.60M  32.4MB/s    eta 80s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 42 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+      "\r",
+      "D1024-E0_1-mem-ctx-   4%[                    ] 121.50M  36.9MB/s    eta 80s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 31 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+      "\r",
+      "1024-E0_1-mem-ctx-4   4%[                    ] 130.07M  39.0MB/s    eta 80s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
+      "\r",
+      "024-E0_1-mem-ctx-4k   5%[>                   ] 137.57M  39.9MB/s    eta 80s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 108 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+      "\r",
+      "24-E0_1-mem-ctx-4k.   5%[>                   ] 147.07M  40.5MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 37 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+      "\r",
+      "4-E0_1-mem-ctx-4k.p   5%[>                   ] 156.38M  40.1MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 58 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-4k.pt   6%[>                   ] 164.59M  40.0MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-4k.pth   6%[>                   ] 171.63M  39.2MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 65 max words, 50 samples - at ../dataset/gen-word-65-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-4k.pth    6%[>                   ] 180.27M  40.0MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 63 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-4k.pth     6%[>                   ] 187.18M  39.2MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-4k.pth      7%[>                   ] 193.83M  39.0MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 177 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+      "\r",
+      "-mem-ctx-4k.pth       7%[>                   ] 202.32M  39.3MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 134 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+      "\r",
+      "mem-ctx-4k.pth        7%[>                   ] 210.41M  39.0MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 77 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+      "\r",
+      "em-ctx-4k.pth         8%[>                   ] 219.40M  39.4MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 44 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+      "\r",
+      "m-ctx-4k.pth          8%[>                   ] 226.92M  39.2MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
+      "\r",
+      "-ctx-4k.pth           8%[>                   ] 235.87M  39.2MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
+      "\r",
+      "ctx-4k.pth            9%[>                   ] 244.37M  39.2MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
+      "\r",
+      "tx-4k.pth             9%[>                   ] 252.94M  39.4MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 28 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+      "\r",
+      "x-4k.pth              9%[>                   ] 260.46M  39.1MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
+      "\r",
+      "-4k.pth               9%[>                   ] 268.38M  39.0MB/s    eta 67s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
+      "\r",
+      "4k.pth               10%[=>                  ] 278.50M  39.2MB/s    eta 67s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 38 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+      "\r",
+      "k.pth                10%[=>                  ] 286.82M  39.2MB/s    eta 67s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+      "\r",
+      ".pth                 10%[=>                  ] 294.98M  39.1MB/s    eta 67s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
+      "\r",
+      "pth                  11%[=>                  ] 303.99M  40.1MB/s    eta 67s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
+      "\r",
+      "th                   11%[=>                  ] 312.18M  39.9MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
+      "\r",
+      "h                    11%[=>                  ] 320.37M  40.7MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+      "\r",
+      "                     12%[=>                  ] 327.41M  40.2MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 560 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+      "\r",
+      "                  v  12%[=>                  ] 334.85M  40.1MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+      "\r",
+      "                 v5  12%[=>                  ] 343.29M  40.2MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 258 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+      "\r",
+      "                v5-  12%[=>                  ] 350.02M  39.8MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+      "\r",
+      "               v5-L  13%[=>                  ] 359.43M  40.0MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
+      "\r",
+      "              v5-L9  13%[=>                  ] 366.85M  39.7MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
+      "\r",
+      "             v5-L96  13%[=>                  ] 374.94M  39.6MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
+      "\r",
+      "            v5-L96-  14%[=>                  ] 383.94M  39.8MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 100 max words, 50 samples - at ../dataset/gen-word-100-count.jsonl\n"
+      "\r",
+      "           v5-L96-D  14%[=>                  ] 389.22M  39.0MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 32 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+      "\r",
+      "          v5-L96-D1  14%[=>                  ] 399.01M  39.5MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 31 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+      "\r",
+      "         v5-L96-D10  14%[=>                  ] 400.87M  37.2MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
+      "\r",
+      "        v5-L96-D102  15%[==>                 ] 409.16M  37.3MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+      "\r",
+      "       v5-L96-D1024  15%[==>                 ] 422.11M  38.5MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+      "\r",
+      "      v5-L96-D1024-  15%[==>                 ] 430.96M  38.8MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
+      "\r",
+      "     v5-L96-D1024-E  16%[==>                 ] 438.90M  38.4MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+      "\r",
+      "    v5-L96-D1024-E0  16%[==>                 ] 448.99M  39.2MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 19 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+      "\r",
+      "   v5-L96-D1024-E0_  16%[==>                 ] 454.42M  38.5MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+      "\r",
+      "  v5-L96-D1024-E0_1  17%[==>                 ] 463.45M  39.3MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 27 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+      "\r",
+      " v5-L96-D1024-E0_1-  17%[==>                 ] 472.14M  39.5MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
+      "\r",
+      "v5-L96-D1024-E0_1-m  17%[==>                 ] 480.86M  39.7MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
+      "\r",
+      "5-L96-D1024-E0_1-me  18%[==>                 ] 488.78M  39.6MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
+      "\r",
+      "-L96-D1024-E0_1-mem  18%[==>                 ] 498.34M  40.4MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+      "\r",
+      "L96-D1024-E0_1-mem-  18%[==>                 ] 507.16M  40.0MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
+      "\r",
+      "96-D1024-E0_1-mem-c  19%[==>                 ] 515.72M  40.0MB/s    eta 57s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+      "\r",
+      "6-D1024-E0_1-mem-ct  19%[==>                 ] 523.23M  40.3MB/s    eta 57s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
+      "\r",
+      "-D1024-E0_1-mem-ctx  19%[==>                 ] 530.88M  40.2MB/s    eta 57s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
+      "\r",
+      "D1024-E0_1-mem-ctx-  20%[===>                ] 541.15M  42.6MB/s    eta 57s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
+      "\r",
+      "1024-E0_1-mem-ctx-4  20%[===>                ] 549.61M  40.9MB/s    eta 57s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+      "\r",
+      "024-E0_1-mem-ctx-4k  20%[===>                ] 557.83M  40.4MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
+      "\r",
+      "24-E0_1-mem-ctx-4k.  21%[===>                ] 566.19M  40.5MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
+      "\r",
+      "4-E0_1-mem-ctx-4k.p  21%[===>                ] 576.30M  40.9MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-4k.pt  21%[===>                ] 584.55M  40.6MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-4k.pth  22%[===>                ] 593.19M  40.8MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-4k.pth   22%[===>                ] 601.02M  41.2MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-4k.pth    22%[===>                ] 607.94M  40.3MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-4k.pth     22%[===>                ] 617.53M  40.9MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
+      "\r",
+      "-mem-ctx-4k.pth      23%[===>                ] 626.19M  41.1MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+      "\r",
+      "mem-ctx-4k.pth       23%[===>                ] 631.94M  40.2MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+      "\r",
+      "em-ctx-4k.pth        23%[===>                ] 642.25M  40.8MB/s    eta 53s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+      "\r",
+      "m-ctx-4k.pth         24%[===>                ] 650.46M  40.2MB/s    eta 53s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+      "\r",
+      "-ctx-4k.pth          24%[===>                ] 660.40M  41.1MB/s    eta 53s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+      "\r",
+      "ctx-4k.pth           24%[===>                ] 669.04M  41.3MB/s    eta 53s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
+      "\r",
+      "tx-4k.pth            25%[====>               ] 677.22M  40.6MB/s    eta 53s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
+      "\r",
+      "x-4k.pth             25%[====>               ] 685.40M  40.8MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
+      "\r",
+      "-4k.pth              25%[====>               ] 692.90M  40.2MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+      "\r",
+      "4k.pth               26%[====>               ] 701.72M  40.5MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+      "\r",
+      "k.pth                26%[====>               ] 710.47M  40.8MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+      "\r",
+      ".pth                 26%[====>               ] 719.50M  40.4MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
+      "\r",
+      "pth                  26%[====>               ] 727.67M  40.6MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
+      "\r",
+      "th                   27%[====>               ] 735.40M  40.5MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
+      "\r",
+      "h                    27%[====>               ] 745.58M  41.2MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
+      "\r",
+      "                     27%[====>               ] 754.27M  41.0MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
+      "\r",
+      "                  v  28%[====>               ] 762.58M  40.9MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+      "\r",
+      "                 v5  28%[====>               ] 770.36M  40.8MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
+      "\r",
+      "                v5-  28%[====>               ] 778.11M  40.5MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
+      "\r",
+      "               v5-L  29%[====>               ] 787.60M  40.9MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
+      "\r",
+      "              v5-L9  29%[====>               ] 795.51M  40.5MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+      "\r",
+      "             v5-L96  29%[====>               ] 804.15M  40.9MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
+      "\r",
+      "            v5-L96-  30%[=====>              ] 811.94M  40.2MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+      "\r",
+      "           v5-L96-D  30%[=====>              ] 819.92M  40.3MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
+      "\r",
+      "          v5-L96-D1  30%[=====>              ] 828.12M  40.4MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
+      "\r",
+      "         v5-L96-D10  31%[=====>              ] 837.98M  40.4MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
+      "\r",
+      "        v5-L96-D102  31%[=====>              ] 846.44M  40.3MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
+      "\r",
+      "       v5-L96-D1024  31%[=====>              ] 855.32M  40.7MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
+      "\r",
+      "      v5-L96-D1024-  32%[=====>              ] 863.40M  40.7MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+      "\r",
+      "     v5-L96-D1024-E  32%[=====>              ] 871.21M  40.2MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
+      "\r",
+      "    v5-L96-D1024-E0  32%[=====>              ] 880.97M  41.1MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
+      "\r",
+      "   v5-L96-D1024-E0_  32%[=====>              ] 889.22M  40.5MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
+      "\r",
+      "  v5-L96-D1024-E0_1  33%[=====>              ] 897.03M  40.2MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+      "\r",
+      " v5-L96-D1024-E0_1-  33%[=====>              ] 898.93M  38.4MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+      "\r",
+      "v5-L96-D1024-E0_1-m  33%[=====>              ] 915.07M  40.6MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+      "\r",
+      "5-L96-D1024-E0_1-me  34%[=====>              ] 924.14M  40.6MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
+      "\r",
+      "-L96-D1024-E0_1-mem  34%[=====>              ] 932.36M  40.8MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+      "\r",
+      "L96-D1024-E0_1-mem-  34%[=====>              ] 941.37M  40.7MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
+      "\r",
+      "96-D1024-E0_1-mem-c  35%[======>             ] 949.66M  40.9MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+      "\r",
+      "6-D1024-E0_1-mem-ct  35%[======>             ] 958.50M  40.8MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
+      "\r",
+      "-D1024-E0_1-mem-ctx  35%[======>             ] 966.79M  40.7MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
+      "\r",
+      "D1024-E0_1-mem-ctx-  36%[======>             ] 976.33M  41.2MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
+      "\r",
+      "1024-E0_1-mem-ctx-4  36%[======>             ] 985.30M  41.5MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+      "\r",
+      "024-E0_1-mem-ctx-4k  36%[======>             ] 993.33M  41.0MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
+      "\r",
+      "24-E0_1-mem-ctx-4k.  37%[======>             ]   1002M  41.1MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
+      "\r",
+      "4-E0_1-mem-ctx-4k.p  37%[======>             ]   1011M  41.0MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-4k.pt  37%[======>             ]   1019M  40.9MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-4k.pth  38%[======>             ]   1.00G  40.9MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-4k.pth   38%[======>             ]   1.01G  42.5MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-4k.pth    38%[======>             ]   1.02G  41.9MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-4k.pth     39%[======>             ]   1.03G  40.6MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
+      "\r",
+      "-mem-ctx-4k.pth      39%[======>             ]   1.04G  40.4MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
+      "\r",
+      "mem-ctx-4k.pth       39%[======>             ]   1.04G  39.5MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
+      "\r",
+      "em-ctx-4k.pth        40%[=======>            ]   1.05G  40.4MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+      "\r",
+      "m-ctx-4k.pth         40%[=======>            ]   1.06G  40.0MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+      "\r",
+      "-ctx-4k.pth          40%[=======>            ]   1.07G  40.5MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
+      "\r",
+      "ctx-4k.pth           40%[=======>            ]   1.08G  40.1MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+      "\r",
+      "tx-4k.pth            41%[=======>            ]   1.09G  40.8MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
+      "\r",
+      "x-4k.pth             41%[=======>            ]   1.09G  40.6MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
+      "\r",
+      "-4k.pth              41%[=======>            ]   1.10G  40.2MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4k.pth               42%[=======>            ]   1.11G  40.0MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "k.pth                42%[=======>            ]   1.12G  39.8MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 42%[=======>            ]   1.12G  39.2MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  43%[=======>            ]   1.13G  39.1MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   43%[=======>            ]   1.14G  38.9MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    43%[=======>            ]   1.15G  39.3MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     44%[=======>            ]   1.16G  38.9MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  44%[=======>            ]   1.17G  39.4MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  44%[=======>            ]   1.17G  39.8MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5-  44%[=======>            ]   1.18G  39.8MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5-L  45%[========>           ]   1.19G  39.6MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5-L9  45%[========>           ]   1.20G  39.7MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5-L96  45%[========>           ]   1.21G  39.9MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5-L96-  46%[========>           ]   1.22G  39.7MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5-L96-D  46%[========>           ]   1.23G  39.2MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5-L96-D1  46%[========>           ]   1.23G  39.5MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5-L96-D10  47%[========>           ]   1.24G  39.9MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5-L96-D102  47%[========>           ]   1.25G  39.6MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5-L96-D1024  47%[========>           ]   1.26G  39.9MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5-L96-D1024-  48%[========>           ]   1.27G  40.6MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5-L96-D1024-E  48%[========>           ]   1.27G  40.1MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5-L96-D1024-E0  48%[========>           ]   1.28G  40.5MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5-L96-D1024-E0_  49%[========>           ]   1.29G  40.5MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5-L96-D1024-E0_1  49%[========>           ]   1.30G  40.6MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5-L96-D1024-E0_1-  49%[========>           ]   1.31G  40.0MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5-L96-D1024-E0_1-m  49%[========>           ]   1.31G  37.9MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5-L96-D1024-E0_1-me  49%[========>           ]   1.31G  37.3MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L96-D1024-E0_1-mem  50%[=========>          ]   1.33G  39.3MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L96-D1024-E0_1-mem-  50%[=========>          ]   1.34G  38.9MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "96-D1024-E0_1-mem-c  51%[=========>          ]   1.34G  38.8MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D1024-E0_1-mem-ct  51%[=========>          ]   1.35G  39.0MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D1024-E0_1-mem-ctx  51%[=========>          ]   1.36G  39.1MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D1024-E0_1-mem-ctx-  52%[=========>          ]   1.37G  38.5MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1024-E0_1-mem-ctx-4  52%[=========>          ]   1.38G  39.2MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "024-E0_1-mem-ctx-4k  52%[=========>          ]   1.39G  39.4MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "24-E0_1-mem-ctx-4k.  53%[=========>          ]   1.40G  39.5MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4-E0_1-mem-ctx-4k.p  53%[=========>          ]   1.40G  39.4MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-4k.pt  53%[=========>          ]   1.41G  39.4MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-4k.pth  53%[=========>          ]   1.42G  39.0MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-4k.pth   54%[=========>          ]   1.43G  40.3MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-4k.pth    54%[=========>          ]   1.44G  40.6MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-4k.pth     54%[=========>          ]   1.44G  42.8MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-4k.pth      55%[==========>         ]   1.45G  42.5MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-4k.pth       55%[==========>         ]   1.46G  40.5MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-4k.pth        55%[==========>         ]   1.47G  40.7MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-4k.pth         56%[==========>         ]   1.48G  40.5MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-4k.pth          56%[==========>         ]   1.48G  39.5MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-4k.pth           56%[==========>         ]   1.49G  39.4MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-4k.pth            57%[==========>         ]   1.50G  40.0MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-4k.pth             57%[==========>         ]   1.51G  39.8MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-4k.pth              57%[==========>         ]   1.52G  39.9MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4k.pth               57%[==========>         ]   1.53G  39.6MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "k.pth                58%[==========>         ]   1.54G  40.3MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 58%[==========>         ]   1.54G  39.8MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  58%[==========>         ]   1.55G  40.0MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   59%[==========>         ]   1.56G  39.6MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    59%[==========>         ]   1.57G  39.9MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     59%[==========>         ]   1.58G  39.5MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  60%[===========>        ]   1.58G  39.8MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  60%[===========>        ]   1.59G  39.8MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5-  60%[===========>        ]   1.60G  39.9MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5-L  61%[===========>        ]   1.61G  40.0MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5-L9  61%[===========>        ]   1.62G  40.5MB/s    eta 26s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5-L96  61%[===========>        ]   1.63G  40.7MB/s    eta 26s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5-L96-  62%[===========>        ]   1.63G  40.6MB/s    eta 26s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5-L96-D  62%[===========>        ]   1.64G  41.2MB/s    eta 26s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5-L96-D1  62%[===========>        ]   1.65G  41.0MB/s    eta 26s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5-L96-D10  63%[===========>        ]   1.66G  40.7MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5-L96-D102  63%[===========>        ]   1.67G  40.7MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5-L96-D1024  63%[===========>        ]   1.67G  40.2MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5-L96-D1024-  63%[===========>        ]   1.68G  40.6MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5-L96-D1024-E  64%[===========>        ]   1.69G  40.3MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5-L96-D1024-E0  64%[===========>        ]   1.70G  40.5MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5-L96-D1024-E0_  64%[===========>        ]   1.71G  40.5MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5-L96-D1024-E0_1  65%[============>       ]   1.72G  40.9MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5-L96-D1024-E0_1-  65%[============>       ]   1.73G  40.6MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5-L96-D1024-E0_1-m  65%[============>       ]   1.73G  40.4MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5-L96-D1024-E0_1-me  66%[============>       ]   1.74G  39.9MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L96-D1024-E0_1-mem  66%[============>       ]   1.75G  39.8MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L96-D1024-E0_1-mem-  66%[============>       ]   1.75G  37.0MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "96-D1024-E0_1-mem-c  67%[============>       ]   1.76G  39.3MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D1024-E0_1-mem-ct  67%[============>       ]   1.77G  39.7MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D1024-E0_1-mem-ctx  67%[============>       ]   1.78G  38.8MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D1024-E0_1-mem-ctx-  67%[============>       ]   1.79G  38.9MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1024-E0_1-mem-ctx-4  68%[============>       ]   1.79G  38.7MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "024-E0_1-mem-ctx-4k  68%[============>       ]   1.81G  39.0MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "24-E0_1-mem-ctx-4k.  68%[============>       ]   1.81G  39.4MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4-E0_1-mem-ctx-4k.p  69%[============>       ]   1.82G  39.3MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-4k.pt  69%[============>       ]   1.83G  38.9MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-4k.pth  69%[============>       ]   1.84G  39.5MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-4k.pth   70%[=============>      ]   1.85G  38.9MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-4k.pth    70%[=============>      ]   1.85G  39.1MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-4k.pth     70%[=============>      ]   1.86G  38.3MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-4k.pth      70%[=============>      ]   1.87G  38.7MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-4k.pth       71%[=============>      ]   1.87G  38.1MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-4k.pth        71%[=============>      ]   1.88G  38.3MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-4k.pth         71%[=============>      ]   1.89G  39.9MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-4k.pth          72%[=============>      ]   1.90G  38.3MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-4k.pth           72%[=============>      ]   1.91G  38.7MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-4k.pth            72%[=============>      ]   1.91G  38.6MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-4k.pth             73%[=============>      ]   1.92G  39.5MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-4k.pth              73%[=============>      ]   1.93G  39.2MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4k.pth               73%[=============>      ]   1.94G  38.9MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "k.pth                74%[=============>      ]   1.95G  39.1MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 74%[=============>      ]   1.96G  39.1MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  74%[=============>      ]   1.97G  39.4MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   75%[==============>     ]   1.97G  38.7MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    75%[==============>     ]   1.98G  39.3MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     75%[==============>     ]   1.99G  40.3MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  76%[==============>     ]   2.00G  40.0MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  76%[==============>     ]   2.01G  40.8MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5-  76%[==============>     ]   2.02G  40.8MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5-L  77%[==============>     ]   2.03G  40.8MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5-L9  77%[==============>     ]   2.03G  41.2MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5-L96  77%[==============>     ]   2.04G  41.2MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5-L96-  77%[==============>     ]   2.05G  41.8MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5-L96-D  78%[==============>     ]   2.06G  40.4MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5-L96-D1  78%[==============>     ]   2.06G  39.9MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5-L96-D10  78%[==============>     ]   2.08G  41.5MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5-L96-D102  79%[==============>     ]   2.08G  40.8MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5-L96-D1024  79%[==============>     ]   2.09G  40.5MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5-L96-D1024-  79%[==============>     ]   2.10G  39.8MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5-L96-D1024-E  80%[===============>    ]   2.11G  40.0MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5-L96-D1024-E0  80%[===============>    ]   2.12G  39.9MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5-L96-D1024-E0_  80%[===============>    ]   2.12G  40.2MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5-L96-D1024-E0_1  81%[===============>    ]   2.13G  39.1MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5-L96-D1024-E0_1-  81%[===============>    ]   2.14G  39.9MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5-L96-D1024-E0_1-m  81%[===============>    ]   2.15G  39.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5-L96-D1024-E0_1-me  82%[===============>    ]   2.16G  39.8MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L96-D1024-E0_1-mem  82%[===============>    ]   2.17G  39.3MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L96-D1024-E0_1-mem-  82%[===============>    ]   2.17G  39.2MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "96-D1024-E0_1-mem-c  82%[===============>    ]   2.18G  39.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D1024-E0_1-mem-ct  83%[===============>    ]   2.19G  39.7MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D1024-E0_1-mem-ctx  83%[===============>    ]   2.20G  40.6MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D1024-E0_1-mem-ctx-  83%[===============>    ]   2.21G  39.8MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1024-E0_1-mem-ctx-4  84%[===============>    ]   2.21G  39.9MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "024-E0_1-mem-ctx-4k  84%[===============>    ]   2.22G  40.4MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "24-E0_1-mem-ctx-4k.  84%[===============>    ]   2.23G  40.4MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4-E0_1-mem-ctx-4k.p  85%[================>   ]   2.24G  40.8MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-4k.pt  85%[================>   ]   2.25G  40.5MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-4k.pth  85%[================>   ]   2.26G  40.4MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-4k.pth   86%[================>   ]   2.27G  40.8MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-4k.pth    86%[================>   ]   2.27G  41.0MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-4k.pth     86%[================>   ]   2.28G  40.1MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-4k.pth      87%[================>   ]   2.29G  40.1MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-4k.pth       87%[================>   ]   2.30G  40.0MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-4k.pth        87%[================>   ]   2.31G  40.4MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-4k.pth         87%[================>   ]   2.31G  40.1MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-4k.pth          88%[================>   ]   2.32G  39.9MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-4k.pth           88%[================>   ]   2.33G  40.0MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-4k.pth            88%[================>   ]   2.34G  39.2MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-4k.pth             89%[================>   ]   2.35G  39.7MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-4k.pth              89%[================>   ]   2.36G  39.4MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4k.pth               89%[================>   ]   2.36G  39.4MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "k.pth                90%[=================>  ]   2.37G  39.6MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 90%[=================>  ]   2.38G  39.4MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  90%[=================>  ]   2.39G  39.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   91%[=================>  ]   2.40G  39.2MB/s    eta 6s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    91%[=================>  ]   2.41G  39.4MB/s    eta 6s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     91%[=================>  ]   2.41G  40.0MB/s    eta 6s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  92%[=================>  ]   2.42G  39.8MB/s    eta 6s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  92%[=================>  ]   2.43G  39.7MB/s    eta 6s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5-  92%[=================>  ]   2.44G  39.5MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5-L  92%[=================>  ]   2.45G  40.6MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5-L9  93%[=================>  ]   2.45G  39.8MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5-L96  93%[=================>  ]   2.46G  39.0MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5-L96-  93%[=================>  ]   2.46G  37.8MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5-L96-D  94%[=================>  ]   2.48G  40.1MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5-L96-D1  94%[=================>  ]   2.49G  40.3MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5-L96-D10  94%[=================>  ]   2.50G  40.4MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5-L96-D102  95%[==================> ]   2.51G  40.3MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5-L96-D1024  95%[==================> ]   2.51G  40.4MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5-L96-D1024-  95%[==================> ]   2.52G  41.1MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5-L96-D1024-E  96%[==================> ]   2.53G  40.8MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5-L96-D1024-E0  96%[==================> ]   2.54G  40.5MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5-L96-D1024-E0_  96%[==================> ]   2.55G  40.5MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5-L96-D1024-E0_1  97%[==================> ]   2.55G  40.3MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5-L96-D1024-E0_1-  97%[==================> ]   2.56G  40.3MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5-L96-D1024-E0_1-m  97%[==================> ]   2.57G  40.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5-L96-D1024-E0_1-me  98%[==================> ]   2.58G  40.9MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L96-D1024-E0_1-mem  98%[==================> ]   2.59G  40.8MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L96-D1024-E0_1-mem-  98%[==================> ]   2.60G  42.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "96-D1024-E0_1-mem-c  99%[==================> ]   2.61G  42.8MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D1024-E0_1-mem-ct  99%[==================> ]   2.61G  40.8MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D1024-E0_1-mem-ctx  99%[==================> ]   2.62G  40.9MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D1024-E0_1-mem-ctx-  99%[==================> ]   2.63G  40.8MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5-L96-D1024-E0_1-m 100%[===================>]   2.63G  41.1MB/s    in 68s     \r\n",
+      "\r\n",
+      "2023-09-02 06:17:41 (39.7 MB/s) - ‘v5-L96-D1024-E0_1-mem-ctx-4k.pth’ saved [2825976699/2825976699]\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 2.7G\r\n",
+      "drwxr-xr-x  2 root root   54 Sep  2 06:16 .\r\n",
+      "drwxr-xr-x 19 root root 4.0K Sep  2 06:16 ..\r\n",
+      "-rw-r--r--  1 root root 2.7G Sep  2 05:37 v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/{FILENAME_PREFIX}-mem-ctx-4k.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "adf68d89",
+   "metadata": {
+    "papermill": {
+     "duration": 0.01742,
+     "end_time": "2023-09-02T06:17:41.804733",
+     "exception": false,
+     "start_time": "2023-09-02T06:17:41.787313",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 6 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 6: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "51c58e54",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-02T06:17:41.841382Z",
+     "iopub.status.busy": "2023-09-02T06:17:41.841191Z",
+     "iopub.status.idle": "2023-09-02T06:17:48.969203Z",
+     "shell.execute_reply": "2023-09-02T06:17:48.967550Z"
+    },
+    "papermill": {
+     "duration": 7.217868,
+     "end_time": "2023-09-02T06:17:49.040024",
+     "exception": false,
+     "start_time": "2023-09-02T06:17:41.822156",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 566 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 84 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 77 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 129 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 56 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 264 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 189 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 58 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 50 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 105 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 44 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 8 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 50 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 32 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 28 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 33 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 6.1G\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  11K Sep  2 06:17 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  56K Sep  2 06:17 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 487K Sep  2 06:17 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58K Sep  2 06:17 gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58K Sep  2 06:17 gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  22M Sep  2 06:17 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  62K Sep  2 06:17 gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  63K Sep  2 06:17 gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  23M Sep  2 06:17 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65K Sep  2 06:17 gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  68K Sep  2 06:17 gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25M Sep  2 06:17 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  72K Sep  2 06:17 gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73K Sep  2 06:17 gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27M Sep  2 06:17 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  76K Sep  2 06:17 gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  13K Sep  2 06:17 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  81K Sep  2 06:17 gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29M Sep  2 06:17 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82K Sep  2 06:17 gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  83K Sep  2 06:17 gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31M Sep  2 06:17 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  87K Sep  2 06:17 gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86K Sep  2 06:17 gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33M Sep  2 06:17 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88K Sep  2 06:17 gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  93K Sep  2 06:17 gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35M Sep  2 06:17 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  92K Sep  2 06:17 gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99K Sep  2 06:17 gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  37M Sep  2 06:17 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 102K Sep  2 06:17 gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  15K Sep  2 06:17 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103K Sep  2 06:17 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39M Sep  2 06:17 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103K Sep  2 06:17 gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 108K Sep  2 06:17 gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  41M Sep  2 06:17 gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 113K Sep  2 06:17 gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 114K Sep  2 06:17 gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42M Sep  2 06:17 gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 116K Sep  2 06:17 gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 114K Sep  2 06:17 gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44M Sep  2 06:17 gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117K Sep  2 06:17 gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 124K Sep  2 06:17 gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  46M Sep  2 06:17 gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 122K Sep  2 06:17 gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  17K Sep  2 06:17 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 125K Sep  2 06:17 gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  48M Sep  2 06:17 gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 128K Sep  2 06:17 gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134K Sep  2 06:17 gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  50M Sep  2 06:17 gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 135K Sep  2 06:17 gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134K Sep  2 06:17 gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  52M Sep  2 06:17 gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 140K Sep  2 06:17 gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 143K Sep  2 06:17 gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54M Sep  2 06:17 gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 141K Sep  2 06:17 gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 144K Sep  2 06:17 gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  56M Sep  2 06:17 gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 150K Sep  2 06:17 gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20K Sep  2 06:17 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 155K Sep  2 06:17 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58M Sep  2 06:17 gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 155K Sep  2 06:17 gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 156K Sep  2 06:17 gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  60M Sep  2 06:17 gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 157K Sep  2 06:17 gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 159K Sep  2 06:17 gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  61M Sep  2 06:17 gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 166K Sep  2 06:17 gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 166K Sep  2 06:17 gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  63M Sep  2 06:17 gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 165K Sep  2 06:17 gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 171K Sep  2 06:17 gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65M Sep  2 06:17 gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 171K Sep  2 06:17 gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  21K Sep  2 06:17 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 175K Sep  2 06:17 gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  67M Sep  2 06:17 gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 175K Sep  2 06:17 gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 180K Sep  2 06:17 gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69M Sep  2 06:17 gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 181K Sep  2 06:17 gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 185K Sep  2 06:17 gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  71M Sep  2 06:17 gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 190K Sep  2 06:17 gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 189K Sep  2 06:17 gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73M Sep  2 06:17 gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 190K Sep  2 06:17 gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 195K Sep  2 06:17 gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  75M Sep  2 06:17 gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 199K Sep  2 06:17 gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25K Sep  2 06:17 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 200K Sep  2 06:17 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77M Sep  2 06:17 gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 205K Sep  2 06:17 gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 208K Sep  2 06:17 gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79M Sep  2 06:17 gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 206K Sep  2 06:17 gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 212K Sep  2 06:17 gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  80M Sep  2 06:17 gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 214K Sep  2 06:17 gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 214K Sep  2 06:17 gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82M Sep  2 06:17 gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 214K Sep  2 06:17 gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 219K Sep  2 06:17 gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84M Sep  2 06:17 gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 222K Sep  2 06:17 gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 223K Sep  2 06:17 gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86M Sep  2 06:17 gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 226K Sep  2 06:17 gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 231K Sep  2 06:17 gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88M Sep  2 06:17 gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 233K Sep  2 06:17 gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 230K Sep  2 06:17 gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90M Sep  2 06:17 gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 238K Sep  2 06:17 gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 236K Sep  2 06:17 gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  92M Sep  2 06:17 gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 241K Sep  2 06:17 gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 239K Sep  2 06:17 gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94M Sep  2 06:17 gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 245K Sep  2 06:17 gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 7.3K Sep  2 06:17 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 248K Sep  2 06:17 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  96M Sep  2 06:17 gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 252K Sep  2 06:17 gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 256K Sep  2 06:17 gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  98M Sep  2 06:17 gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 261K Sep  2 06:17 gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 262K Sep  2 06:17 gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99M Sep  2 06:17 gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 261K Sep  2 06:17 gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 262K Sep  2 06:17 gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101M Sep  2 06:17 gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 266K Sep  2 06:17 gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 264K Sep  2 06:17 gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103M Sep  2 06:17 gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 273K Sep  2 06:17 gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 277K Sep  2 06:17 gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 105M Sep  2 06:17 gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 278K Sep  2 06:17 gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 272K Sep  2 06:17 gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 107M Sep  2 06:17 gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 282K Sep  2 06:17 gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 283K Sep  2 06:17 gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 109M Sep  2 06:17 gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 285K Sep  2 06:17 gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 290K Sep  2 06:17 gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 111M Sep  2 06:17 gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 286K Sep  2 06:17 gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 290K Sep  2 06:17 gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 113M Sep  2 06:17 gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 291K Sep  2 06:17 gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35K Sep  2 06:17 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 302K Sep  2 06:17 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 115M Sep  2 06:17 gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 298K Sep  2 06:17 gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 304K Sep  2 06:17 gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117M Sep  2 06:17 gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 306K Sep  2 06:17 gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 304K Sep  2 06:17 gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 118M Sep  2 06:17 gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 306K Sep  2 06:17 gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 310K Sep  2 06:17 gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 120M Sep  2 06:17 gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 312K Sep  2 06:17 gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 317K Sep  2 06:17 gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 122M Sep  2 06:17 gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 322K Sep  2 06:17 gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  36K Sep  2 06:17 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 322K Sep  2 06:17 gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 124M Sep  2 06:17 gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 328K Sep  2 06:17 gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 325K Sep  2 06:17 gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 126M Sep  2 06:17 gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 329K Sep  2 06:17 gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 334K Sep  2 06:17 gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 128M Sep  2 06:17 gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 332K Sep  2 06:17 gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 333K Sep  2 06:17 gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 130M Sep  2 06:17 gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 341K Sep  2 06:17 gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 338K Sep  2 06:17 gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 132M Sep  2 06:17 gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 340K Sep  2 06:17 gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39K Sep  2 06:17 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 342K Sep  2 06:17 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134M Sep  2 06:17 gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 351K Sep  2 06:17 gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 347K Sep  2 06:17 gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 136M Sep  2 06:17 gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 352K Sep  2 06:17 gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 355K Sep  2 06:17 gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 137M Sep  2 06:17 gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 354K Sep  2 06:17 gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 358K Sep  2 06:17 gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 139M Sep  2 06:17 gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 372K Sep  2 06:17 gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 363K Sep  2 06:17 gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 141M Sep  2 06:17 gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 364K Sep  2 06:17 gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42K Sep  2 06:17 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 372K Sep  2 06:17 gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 143M Sep  2 06:17 gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 366K Sep  2 06:17 gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 376K Sep  2 06:17 gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 145M Sep  2 06:17 gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 372K Sep  2 06:17 gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 381K Sep  2 06:17 gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 147M Sep  2 06:17 gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 387K Sep  2 06:17 gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 381K Sep  2 06:17 gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 149M Sep  2 06:17 gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 381K Sep  2 06:17 gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 390K Sep  2 06:17 gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 151M Sep  2 06:17 gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 391K Sep  2 06:17 gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44K Sep  2 06:17 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 391K Sep  2 06:17 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 153M Sep  2 06:17 gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 396K Sep  2 06:17 gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 400K Sep  2 06:17 gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 398K Sep  2 06:17 gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 409K Sep  2 06:17 gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 403K Sep  2 06:17 gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 409K Sep  2 06:17 gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 411K Sep  2 06:17 gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 414K Sep  2 06:17 gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 413K Sep  2 06:17 gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  46K Sep  2 06:17 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 412K Sep  2 06:17 gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 418K Sep  2 06:17 gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 417K Sep  2 06:17 gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 432K Sep  2 06:17 gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 429K Sep  2 06:17 gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 429K Sep  2 06:17 gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 432K Sep  2 06:17 gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 437K Sep  2 06:17 gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 432K Sep  2 06:17 gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 446K Sep  2 06:17 gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  49K Sep  2 06:17 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 448K Sep  2 06:17 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 448K Sep  2 06:17 gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 441K Sep  2 06:17 gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 449K Sep  2 06:17 gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 450K Sep  2 06:17 gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 460K Sep  2 06:17 gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 464K Sep  2 06:17 gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 458K Sep  2 06:17 gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 465K Sep  2 06:17 gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 467K Sep  2 06:17 gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  53K Sep  2 06:17 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 466K Sep  2 06:17 gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 464K Sep  2 06:17 gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 473K Sep  2 06:17 gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 474K Sep  2 06:17 gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 475K Sep  2 06:17 gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 477K Sep  2 06:17 gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 483K Sep  2 06:17 gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 485K Sep  2 06:17 gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 486K Sep  2 06:17 gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 486K Sep  2 06:17 gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  52K Sep  2 06:17 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 523K Sep  2 06:17 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 523K Sep  2 06:17 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep  2 06:17 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep  2 06:17 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  45K Sep  2 06:17 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  40K Sep  2 06:17 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Sep  2 06:17 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35K Sep  2 06:17 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep  2 06:17 shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 516K Sep  2 06:17 shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 510K Sep  2 06:17 shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  34K Sep  2 06:17 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33K Sep  2 06:17 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  32K Sep  2 06:17 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33K Sep  2 06:17 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  85K Sep  2 06:17 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  32K Sep  2 06:17 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Sep  2 06:17 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Sep  2 06:17 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 6.0K Sep  2 06:17 word-2-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 50 &\n",
+    "for i in {5..1000..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 50 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 50+ - 4200 words dataset\n",
+    "# \n",
+    "for i in {1100..8000..100} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -lh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2eb840b5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-02T06:17:49.255288Z",
+     "iopub.status.busy": "2023-09-02T06:17:49.254615Z"
+    },
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": false,
+     "start_time": "2023-09-02T06:17:49.184772",
+     "status": "running"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4016710040\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 4016710040\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230902_061755-f2kzi2o7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f2kzi2o7\u001b[0m\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/connector.py:562: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
+      "  rank_zero_warn(\r\n",
+      "GPU available: True (cuda), used: True\r\n",
+      "TPU available: False, using: 0 TPU cores\r\n",
+      "IPU available: False, using: 0 IPUs\r\n",
+      "HPU available: False, using: 0 HPUs\r\n",
+      "\r\n",
+      "\r\n",
+      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
+      "   - target_batch_size:       256\r\n",
+      "   - num_nodes:               1\r\n",
+      "   - num_devices:             8\r\n",
+      "   - accumulate_grad_batches: 32\r\n",
+      "   - effective_batch_size:    256\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Resolving data files:   0%|                             | 0/541 [00:00<?, ?it/s]\r",
+      "Resolving data files: 100%|███████████████| 541/541 [00:00<00:00, 219217.32it/s]\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading and preparing dataset json/default to /actions-runner/.cache/huggingface/datasets/json/default-5b99ac9e8f2083f3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\r\n",
+      "\r",
+      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]\r",
+      "Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 144.05it/s]\r\n",
+      "\r",
+      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00,  6.11it/s]\r",
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00,  6.09it/s]\r\n",
+      "\r",
+      "Generating train split: 0 examples [00:00, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n",
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n",
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 1] Global seed set to 4016710040\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 3] Global seed set to 4016710040\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 4] Global seed set to 4016710040\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 6] Global seed set to 4016710040\r\n",
+      "[rank: 7] Global seed set to 4016710040\r\n",
+      "[rank: 2] Global seed set to 4016710040\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 5] Global seed set to 4016710040\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 1196 examples [00:10, 111.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 10026 examples [00:10, 1263.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 21237 examples [00:11, 3282.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 30015 examples [00:11, 5412.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 38572 examples [00:11, 8118.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 46262 examples [00:11, 11051.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 52932 examples [00:11, 14163.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 59424 examples [00:11, 17679.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 66940 examples [00:11, 22906.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 72972 examples [00:11, 24826.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 78377 examples [00:12, 27244.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 83241 examples [00:12, 27947.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 87440 examples [00:12, 27873.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 91429 examples [00:12, 26415.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 94971 examples [00:12, 27319.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 98304 examples [00:12, 28128.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 101778 examples [00:12, 26589.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 104707 examples [00:13, 26921.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 108050 examples [00:13, 28043.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 146873 examples [00:13, 118860.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                                                                     \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset json downloaded and prepared to /actions-runner/.cache/huggingface/datasets/json/default-5b99ac9e8f2083f3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\r\n",
+      "\r",
+      "  0%|                                                     | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 24.72it/s]\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|                      | 0/154911 [00:00<?, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 10/154911 [00:01<7:49:21,  5.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|            | 100/154911 [00:02<43:28, 59.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|            | 118/154911 [00:02<37:56, 68.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|            | 153/154911 [00:02<28:05, 91.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|            | 174/154911 [00:02<26:58, 95.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|            | 191/154911 [00:02<27:39, 93.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 322/154911 [00:03<10:33, 243.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 430/154911 [00:03<07:23, 348.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 476/154911 [00:03<08:57, 287.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 517/154911 [00:03<10:29, 245.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 572/154911 [00:03<08:49, 291.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 610/154911 [00:04<10:08, 253.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 642/154911 [00:04<12:53, 199.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 700/154911 [00:04<11:24, 225.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   0%|           | 742/154911 [00:04<11:33, 222.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|           | 788/154911 [00:04<09:49, 261.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|           | 830/154911 [00:04<09:27, 271.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|           | 862/154911 [00:05<11:52, 216.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|           | 945/154911 [00:05<07:58, 321.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|           | 986/154911 [00:05<09:29, 270.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1193/154911 [00:05<06:29, 394.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1409/154911 [00:06<03:56, 647.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1497/154911 [00:06<04:48, 531.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1568/154911 [00:06<05:41, 448.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1634/154911 [00:06<05:34, 458.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1693/154911 [00:06<05:33, 458.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|          | 1796/154911 [00:07<06:13, 410.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|▏         | 2003/154911 [00:07<03:43, 684.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|▏         | 2100/154911 [00:07<04:43, 539.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   1%|▏         | 2241/154911 [00:07<05:07, 495.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 2345/154911 [00:08<06:44, 377.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 2515/154911 [00:08<05:47, 438.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 2636/154911 [00:08<04:48, 527.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 2708/154911 [00:09<06:10, 410.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 2818/154911 [00:09<05:06, 496.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 2888/154911 [00:09<07:00, 361.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3048/154911 [00:09<06:47, 372.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3098/154911 [00:10<07:07, 354.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3151/154911 [00:10<07:47, 324.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3341/154911 [00:10<06:20, 398.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3435/154911 [00:10<06:49, 369.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3474/154911 [00:11<08:46, 287.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3564/154911 [00:11<07:32, 334.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   2%|▏         | 3770/154911 [00:11<06:35, 381.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 4110/154911 [00:12<03:28, 721.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 4227/154911 [00:12<04:13, 595.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 4327/154911 [00:12<05:01, 498.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 4660/154911 [00:12<03:18, 758.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 4761/154911 [00:13<03:33, 703.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 4848/154911 [00:13<04:59, 501.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 5090/154911 [00:13<03:21, 742.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 5207/154911 [00:13<04:03, 615.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 5302/154911 [00:14<04:09, 599.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   3%|▎         | 5384/154911 [00:14<04:19, 575.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▎         | 5458/154911 [00:14<05:18, 469.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▎         | 5551/154911 [00:14<04:36, 539.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▎         | 5699/154911 [00:14<04:09, 596.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 5831/154911 [00:15<04:20, 572.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 5979/154911 [00:15<03:59, 621.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6097/154911 [00:15<04:36, 538.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6233/154911 [00:15<03:58, 622.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6343/154911 [00:15<03:58, 623.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6427/154911 [00:16<03:59, 621.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6493/154911 [00:16<04:16, 578.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6554/154911 [00:16<06:00, 411.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6636/154911 [00:16<05:55, 417.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6825/154911 [00:16<03:50, 641.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   4%|▍         | 6925/154911 [00:17<04:30, 547.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍         | 7052/154911 [00:17<04:55, 501.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍         | 7206/154911 [00:17<04:54, 501.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍         | 7404/154911 [00:17<03:34, 686.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍         | 7648/154911 [00:17<02:32, 964.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍        | 7808/154911 [00:18<02:19, 1051.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍        | 7959/154911 [00:18<02:10, 1127.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍        | 8160/154911 [00:18<01:51, 1315.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍        | 8320/154911 [00:18<02:05, 1167.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   5%|▍        | 8454/154911 [00:18<02:26, 1001.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 8569/154911 [00:18<02:56, 826.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 8665/154911 [00:18<03:08, 776.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 8754/154911 [00:19<03:16, 743.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 8834/154911 [00:19<03:22, 722.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 8910/154911 [00:19<03:26, 708.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 8987/154911 [00:19<03:23, 716.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9061/154911 [00:19<03:37, 669.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9130/154911 [00:19<03:57, 613.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9193/154911 [00:19<04:06, 592.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9253/154911 [00:19<04:29, 540.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9308/154911 [00:20<04:41, 518.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9392/154911 [00:20<04:03, 596.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9454/154911 [00:20<04:05, 593.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9517/154911 [00:20<04:02, 600.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9578/154911 [00:20<04:06, 590.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌         | 9638/154911 [00:20<04:20, 557.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▋         | 9695/154911 [00:20<04:26, 544.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▋         | 9751/154911 [00:20<04:32, 533.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▋         | 9809/154911 [00:20<04:25, 545.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▋         | 9869/154911 [00:21<04:18, 560.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▋         | 9944/154911 [00:21<04:19, 558.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   6%|▌        | 10069/154911 [00:21<03:16, 737.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10145/154911 [00:21<03:18, 729.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10219/154911 [00:21<03:20, 722.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10296/154911 [00:21<03:26, 699.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10367/154911 [00:21<03:32, 679.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10438/154911 [00:21<03:31, 683.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10513/154911 [00:21<03:26, 700.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10594/154911 [00:22<03:17, 729.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10677/154911 [00:22<03:10, 758.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▌        | 10754/154911 [00:22<03:16, 732.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 10828/154911 [00:22<03:28, 692.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 10898/154911 [00:22<03:37, 662.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 10989/154911 [00:22<03:27, 694.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11061/154911 [00:22<03:25, 701.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11136/154911 [00:22<03:22, 708.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11208/154911 [00:22<03:37, 660.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11275/154911 [00:23<03:51, 620.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11338/154911 [00:23<03:58, 600.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11404/154911 [00:23<03:53, 613.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11466/154911 [00:23<04:08, 578.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11525/154911 [00:23<04:18, 554.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   7%|▋        | 11583/154911 [00:23<04:28, 533.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 11653/154911 [00:23<04:14, 563.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 11721/154911 [00:23<04:00, 594.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 11782/154911 [00:23<03:59, 597.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 11855/154911 [00:24<03:52, 616.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 11923/154911 [00:24<03:49, 622.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12019/154911 [00:24<03:25, 694.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12104/154911 [00:24<03:14, 735.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12179/154911 [00:24<03:22, 703.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12250/154911 [00:24<03:28, 685.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12334/154911 [00:24<03:16, 726.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12415/154911 [00:24<03:16, 724.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋        | 12566/154911 [00:24<02:35, 913.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋       | 12719/154911 [00:25<02:12, 1075.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▋       | 12873/154911 [00:25<02:00, 1176.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▊        | 12992/154911 [00:25<02:23, 986.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   8%|▊        | 13096/154911 [00:25<02:37, 903.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13191/154911 [00:25<02:44, 859.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13280/154911 [00:25<02:59, 786.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13362/154911 [00:25<03:15, 723.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13444/154911 [00:26<03:22, 699.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13519/154911 [00:26<03:27, 681.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13590/154911 [00:26<03:25, 687.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13680/154911 [00:26<03:11, 737.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13762/154911 [00:26<03:05, 759.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13839/154911 [00:26<03:08, 750.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13915/154911 [00:26<03:11, 734.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 13989/154911 [00:26<03:11, 734.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14090/154911 [00:26<02:53, 811.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14185/154911 [00:26<02:46, 847.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14273/154911 [00:27<02:50, 824.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14357/154911 [00:27<02:53, 811.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14442/154911 [00:27<02:50, 822.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14527/154911 [00:27<03:43, 628.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):   9%|▊        | 14668/154911 [00:27<03:01, 772.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▊        | 14772/154911 [00:27<02:48, 829.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▊        | 14872/154911 [00:27<02:40, 871.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▊        | 14997/154911 [00:27<02:24, 969.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▊       | 15107/154911 [00:28<02:19, 1005.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15216/154911 [00:28<02:35, 895.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15317/154911 [00:28<02:41, 863.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15407/154911 [00:28<02:50, 819.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15493/154911 [00:28<02:49, 822.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15577/154911 [00:28<02:58, 781.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15667/154911 [00:28<02:51, 811.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15762/154911 [00:28<02:44, 846.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15849/154911 [00:28<02:58, 777.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 15929/154911 [00:29<03:08, 735.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 16014/154911 [00:29<03:03, 757.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 16091/154911 [00:29<03:13, 717.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 16166/154911 [00:29<03:33, 650.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  10%|▉        | 16233/154911 [00:29<03:31, 655.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16300/154911 [00:29<03:32, 651.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16366/154911 [00:29<03:43, 621.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16445/154911 [00:29<03:34, 645.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16531/154911 [00:30<03:16, 704.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16623/154911 [00:30<03:02, 757.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16700/154911 [00:30<03:09, 727.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16774/154911 [00:30<03:15, 707.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16846/154911 [00:30<03:17, 699.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16917/154911 [00:30<03:29, 659.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 16992/154911 [00:30<03:21, 684.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 17080/154911 [00:30<03:07, 735.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|▉        | 17158/154911 [00:30<03:11, 719.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17261/154911 [00:30<02:52, 796.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17342/154911 [00:31<03:00, 763.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17419/154911 [00:31<03:15, 703.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17491/154911 [00:31<03:27, 663.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17559/154911 [00:31<03:34, 640.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17633/154911 [00:31<03:31, 649.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17708/154911 [00:31<03:23, 673.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  11%|█        | 17776/154911 [00:31<03:31, 647.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 17863/154911 [00:31<03:13, 707.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 17937/154911 [00:32<03:11, 713.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18010/154911 [00:32<03:16, 695.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18091/154911 [00:32<03:08, 724.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18167/154911 [00:32<03:32, 644.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18264/154911 [00:32<03:08, 725.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18340/154911 [00:32<03:11, 712.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18449/154911 [00:32<02:47, 812.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18537/154911 [00:32<02:44, 831.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18624/154911 [00:32<02:46, 818.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18708/154911 [00:33<02:50, 798.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18789/154911 [00:33<03:02, 746.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18870/154911 [00:33<03:03, 741.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 18946/154911 [00:33<03:20, 677.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 19022/154911 [00:33<03:14, 698.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 19094/154911 [00:33<03:23, 666.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 19163/154911 [00:33<03:33, 636.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 19235/154911 [00:33<03:30, 643.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  12%|█        | 19304/154911 [00:33<03:28, 649.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19372/154911 [00:34<03:26, 656.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19438/154911 [00:34<03:30, 644.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19508/154911 [00:34<03:25, 659.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19589/154911 [00:34<03:12, 702.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19660/154911 [00:34<03:15, 692.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19740/154911 [00:34<03:07, 720.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19814/154911 [00:34<03:11, 705.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19885/154911 [00:34<03:20, 672.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 19955/154911 [00:34<03:18, 678.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20029/154911 [00:34<03:13, 695.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20102/154911 [00:35<03:12, 701.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20173/154911 [00:35<03:22, 664.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20249/154911 [00:35<03:15, 689.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20360/154911 [00:35<02:46, 808.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20467/154911 [00:35<02:32, 883.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20562/154911 [00:35<02:31, 887.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█       | 20693/154911 [00:35<02:13, 1005.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20795/154911 [00:35<02:43, 822.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  13%|█▏       | 20885/154911 [00:36<02:47, 801.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▏       | 20976/154911 [00:36<02:42, 823.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█       | 21152/154911 [00:36<02:06, 1056.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█       | 21300/154911 [00:36<01:59, 1118.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█       | 21425/154911 [00:36<01:55, 1152.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█       | 21543/154911 [00:36<02:06, 1056.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█       | 21659/154911 [00:36<02:03, 1082.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▏      | 21815/154911 [00:36<01:49, 1212.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▏      | 21943/154911 [00:36<01:54, 1161.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▏      | 22080/154911 [00:37<01:49, 1217.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▏      | 22204/154911 [00:37<02:11, 1012.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▎       | 22313/154911 [00:37<02:20, 946.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  14%|█▎       | 22414/154911 [00:37<02:37, 839.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22503/154911 [00:37<02:38, 833.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22590/154911 [00:37<02:50, 776.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22675/154911 [00:37<02:55, 751.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22752/154911 [00:37<02:57, 744.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22828/154911 [00:38<03:27, 637.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22895/154911 [00:38<04:01, 547.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 22957/154911 [00:38<03:54, 563.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23017/154911 [00:38<03:51, 569.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23078/154911 [00:38<03:48, 577.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23138/154911 [00:38<03:59, 550.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23195/154911 [00:38<04:11, 523.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23249/154911 [00:38<04:19, 506.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23317/154911 [00:39<03:59, 549.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23373/154911 [00:39<03:58, 552.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23429/154911 [00:39<04:21, 502.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23525/154911 [00:39<03:31, 620.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23590/154911 [00:39<03:31, 620.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▎       | 23654/154911 [00:39<03:35, 608.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▍       | 23717/154911 [00:39<03:36, 605.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▍       | 23783/154911 [00:39<03:33, 613.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▍       | 23846/154911 [00:39<03:34, 611.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▍       | 23908/154911 [00:40<03:36, 605.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  15%|█▍       | 23973/154911 [00:40<03:33, 612.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24069/154911 [00:40<03:03, 711.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24146/154911 [00:40<02:59, 727.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24225/154911 [00:40<02:59, 727.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24299/154911 [00:40<03:20, 650.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24366/154911 [00:40<03:21, 646.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24432/154911 [00:40<03:20, 649.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24498/154911 [00:40<03:22, 644.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24564/154911 [00:41<03:39, 594.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24627/154911 [00:41<03:35, 603.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24690/154911 [00:41<03:33, 609.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24754/154911 [00:41<03:43, 582.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24813/154911 [00:41<03:43, 581.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24888/154911 [00:41<03:39, 593.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 24958/154911 [00:41<03:29, 619.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25039/154911 [00:41<03:13, 670.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25110/154911 [00:41<03:15, 664.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25177/154911 [00:41<03:20, 646.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25249/154911 [00:42<03:15, 664.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25326/154911 [00:42<03:06, 694.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25398/154911 [00:42<03:17, 656.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25470/154911 [00:42<03:12, 672.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  16%|█▍       | 25538/154911 [00:42<03:25, 629.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▍       | 25602/154911 [00:42<03:42, 580.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▍       | 25668/154911 [00:42<03:35, 598.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▍       | 25729/154911 [00:42<03:35, 599.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▍       | 25791/154911 [00:42<03:41, 582.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 25850/154911 [00:43<03:50, 559.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 25907/154911 [00:43<03:52, 554.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 25963/154911 [00:43<03:52, 554.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26025/154911 [00:43<03:47, 565.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26082/154911 [00:43<03:58, 540.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26138/154911 [00:43<03:58, 540.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26193/154911 [00:43<04:31, 473.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26255/154911 [00:43<04:11, 511.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26315/154911 [00:43<04:00, 535.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26377/154911 [00:44<03:50, 558.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26447/154911 [00:44<03:35, 597.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26513/154911 [00:44<03:28, 614.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26577/154911 [00:44<03:48, 560.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26680/154911 [00:44<03:16, 652.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26746/154911 [00:44<03:16, 652.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26812/154911 [00:44<03:26, 618.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26878/154911 [00:44<03:28, 613.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 26940/154911 [00:45<04:20, 491.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 27011/154911 [00:45<03:55, 544.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  17%|█▌       | 27070/154911 [00:45<04:07, 515.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27125/154911 [00:45<04:10, 510.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27178/154911 [00:45<04:10, 510.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27231/154911 [00:45<04:18, 494.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27295/154911 [00:45<04:01, 527.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27361/154911 [00:45<03:55, 541.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27416/154911 [00:45<04:01, 528.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27479/154911 [00:46<03:49, 555.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27548/154911 [00:46<03:34, 593.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27617/154911 [00:46<03:25, 618.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27682/154911 [00:46<03:24, 622.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27746/154911 [00:46<03:23, 624.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▌       | 27809/154911 [00:46<03:41, 573.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 27993/154911 [00:46<02:17, 920.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 28089/154911 [00:46<02:25, 870.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 28179/154911 [00:46<02:31, 834.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 28285/154911 [00:47<02:21, 892.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 28377/154911 [00:47<02:31, 832.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 28469/154911 [00:47<02:27, 854.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  18%|█▋       | 28557/154911 [00:47<02:34, 818.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 28681/154911 [00:47<02:15, 929.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 28776/154911 [00:47<02:40, 785.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 28860/154911 [00:47<03:07, 672.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 28933/154911 [00:48<04:10, 502.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 28995/154911 [00:48<04:01, 520.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29056/154911 [00:48<04:28, 469.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29109/154911 [00:48<04:32, 462.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29159/154911 [00:48<04:36, 454.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29214/154911 [00:48<04:24, 475.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29264/154911 [00:48<04:26, 471.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29320/154911 [00:48<04:17, 487.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29381/154911 [00:49<04:01, 519.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29447/154911 [00:49<03:49, 547.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29503/154911 [00:49<03:52, 540.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29574/154911 [00:49<03:33, 587.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29634/154911 [00:49<03:39, 570.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29692/154911 [00:49<03:58, 525.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29746/154911 [00:49<04:07, 506.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29812/154911 [00:49<03:52, 539.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29896/154911 [00:49<03:21, 621.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 29960/154911 [00:50<03:25, 609.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 30022/154911 [00:50<03:40, 565.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▋       | 30085/154911 [00:50<03:46, 550.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▊       | 30148/154911 [00:50<03:42, 559.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  19%|█▊       | 30205/154911 [00:50<03:48, 546.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30270/154911 [00:50<03:37, 573.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30328/154911 [00:50<05:48, 357.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30574/154911 [00:51<02:41, 772.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30678/154911 [00:51<02:36, 794.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30779/154911 [00:51<02:56, 705.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30865/154911 [00:51<03:24, 605.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 30941/154911 [00:51<03:32, 582.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31009/154911 [00:51<03:32, 583.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31074/154911 [00:51<03:49, 539.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31132/154911 [00:52<04:07, 500.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31187/154911 [00:52<04:03, 508.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31246/154911 [00:52<03:55, 526.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31301/154911 [00:52<03:56, 523.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31362/154911 [00:52<03:46, 546.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31439/154911 [00:52<03:26, 597.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31514/154911 [00:52<03:12, 639.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31580/154911 [00:52<03:24, 602.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31643/154911 [00:52<03:27, 592.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  20%|█▊       | 31708/154911 [00:53<03:22, 607.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 31770/154911 [00:53<03:46, 543.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 31826/154911 [00:53<04:03, 505.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 31878/154911 [00:53<04:41, 437.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 31940/154911 [00:53<04:15, 481.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 31991/154911 [00:53<04:42, 435.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 32037/154911 [00:53<05:04, 403.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 32090/154911 [00:53<04:52, 420.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 32149/154911 [00:54<04:26, 461.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 32202/154911 [00:54<04:17, 476.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▊       | 32266/154911 [00:54<03:56, 517.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32320/154911 [00:54<03:59, 510.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32383/154911 [00:54<03:46, 539.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32459/154911 [00:54<03:25, 594.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32540/154911 [00:54<03:14, 628.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32604/154911 [00:54<03:18, 616.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32682/154911 [00:54<03:09, 645.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32747/154911 [00:55<03:12, 634.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32873/154911 [00:55<02:31, 807.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 32955/154911 [00:55<04:07, 493.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 33078/154911 [00:55<03:14, 627.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 33158/154911 [00:55<03:32, 571.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 33227/154911 [00:55<03:46, 537.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  21%|█▉       | 33289/154911 [00:56<03:57, 513.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33346/154911 [00:56<04:14, 478.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33398/154911 [00:56<04:15, 475.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33450/154911 [00:56<04:10, 485.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33519/154911 [00:56<03:46, 537.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33584/154911 [00:56<03:34, 565.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33649/154911 [00:56<03:30, 576.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33709/154911 [00:56<03:36, 560.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33769/154911 [00:56<03:32, 570.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33828/154911 [00:57<03:37, 555.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33896/154911 [00:57<03:25, 589.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 33966/154911 [00:57<03:16, 616.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 34042/154911 [00:57<03:06, 649.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 34109/154911 [00:57<03:17, 612.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 34171/154911 [00:57<03:21, 598.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 34232/154911 [00:57<03:34, 562.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 34301/154911 [00:57<03:23, 591.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|█▉       | 34376/154911 [00:57<03:10, 631.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|██       | 34493/154911 [00:58<02:34, 777.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|██       | 34572/154911 [00:58<02:36, 770.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|██       | 34653/154911 [00:58<02:33, 781.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|██       | 34732/154911 [00:58<02:44, 730.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  22%|██       | 34807/154911 [00:58<03:02, 658.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 34879/154911 [00:58<02:59, 670.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 34949/154911 [00:58<03:10, 629.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35014/154911 [00:58<03:12, 623.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35102/154911 [00:58<02:59, 665.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35170/154911 [00:59<03:00, 662.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35252/154911 [00:59<02:51, 696.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35323/154911 [00:59<02:56, 676.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35392/154911 [00:59<02:59, 666.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35469/154911 [00:59<02:53, 690.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35541/154911 [00:59<02:56, 677.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35609/154911 [00:59<03:07, 636.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35674/154911 [00:59<03:12, 620.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35738/154911 [00:59<03:28, 571.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35796/154911 [01:00<03:34, 554.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35852/154911 [01:00<03:47, 523.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35907/154911 [01:00<04:26, 445.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 35967/154911 [01:00<04:11, 473.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36017/154911 [01:00<04:20, 456.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36086/154911 [01:00<03:51, 512.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36148/154911 [01:00<03:40, 539.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36204/154911 [01:00<03:50, 515.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36257/154911 [01:01<04:08, 476.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36309/154911 [01:01<04:18, 459.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36356/154911 [01:01<04:23, 449.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  23%|██       | 36402/154911 [01:01<04:24, 447.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██       | 36462/154911 [01:01<04:10, 472.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██       | 36550/154911 [01:01<03:23, 581.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 36615/154911 [01:01<03:18, 595.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 36696/154911 [01:01<03:00, 654.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 36764/154911 [01:01<03:05, 636.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 36829/154911 [01:02<03:23, 581.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 36889/154911 [01:02<03:41, 533.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 36944/154911 [01:02<03:39, 536.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37007/154911 [01:02<03:30, 559.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37065/154911 [01:02<03:33, 552.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37138/154911 [01:02<03:16, 599.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37216/154911 [01:02<03:11, 615.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37286/154911 [01:02<03:06, 630.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|█��▏      | 37363/154911 [01:02<02:58, 658.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37435/154911 [01:03<02:54, 673.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37503/154911 [01:03<03:00, 649.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37572/154911 [01:03<03:01, 647.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37637/154911 [01:03<03:13, 606.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37701/154911 [01:03<03:12, 609.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37781/154911 [01:03<02:57, 660.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37852/154911 [01:03<02:54, 671.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  24%|██▏      | 37928/154911 [01:03<02:48, 695.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38006/154911 [01:03<02:43, 714.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38079/154911 [01:04<02:58, 655.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38158/154911 [01:04<02:49, 690.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38229/154911 [01:04<02:55, 664.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38298/154911 [01:04<02:54, 669.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38366/154911 [01:04<02:56, 659.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38433/154911 [01:04<04:26, 436.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38566/154911 [01:04<03:08, 616.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38641/154911 [01:04<03:23, 570.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▏      | 38708/154911 [01:05<03:40, 526.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 38768/154911 [01:05<03:47, 510.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 38825/154911 [01:05<03:59, 485.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 38877/154911 [01:05<04:10, 463.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 38943/154911 [01:05<03:50, 504.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 38996/154911 [01:05<04:04, 474.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39046/154911 [01:05<04:01, 478.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39096/154911 [01:05<04:15, 453.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39161/154911 [01:06<04:00, 481.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39211/154911 [01:06<08:16, 233.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39334/154911 [01:06<05:01, 383.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39397/154911 [01:06<04:56, 389.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  25%|██▎      | 39453/154911 [01:06<04:41, 410.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39507/154911 [01:07<06:02, 318.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39595/154911 [01:07<04:36, 416.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39652/154911 [01:07<04:41, 410.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39710/154911 [01:07<04:19, 443.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39763/154911 [01:07<04:18, 445.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39862/154911 [01:07<03:19, 576.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39928/154911 [01:07<03:29, 549.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 39991/154911 [01:08<03:28, 550.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40050/154911 [01:08<03:37, 527.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40106/154911 [01:08<04:00, 477.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40157/154911 [01:08<04:00, 476.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40211/154911 [01:08<03:56, 484.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40267/154911 [01:08<03:56, 484.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40325/154911 [01:08<03:46, 506.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40380/154911 [01:08<03:41, 516.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40434/154911 [01:09<03:40, 518.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40487/154911 [01:09<03:55, 486.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40537/154911 [01:09<03:58, 480.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40586/154911 [01:09<04:00, 475.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40634/154911 [01:09<04:15, 446.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40680/154911 [01:09<04:16, 445.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40726/154911 [01:09<04:14, 448.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40772/154911 [01:09<04:25, 429.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▎      | 40818/154911 [01:09<04:26, 428.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▍      | 40893/154911 [01:09<03:39, 518.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▍      | 40950/154911 [01:10<03:36, 526.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  26%|██▍      | 41021/154911 [01:10<03:18, 573.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41103/154911 [01:10<02:57, 641.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41173/154911 [01:10<02:53, 653.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41239/154911 [01:10<03:22, 561.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41312/154911 [01:10<03:10, 595.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41380/154911 [01:10<03:11, 592.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41441/154911 [01:10<03:15, 581.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41502/154911 [01:11<03:22, 559.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41560/154911 [01:11<03:25, 550.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41621/154911 [01:11<03:20, 565.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41679/154911 [01:11<03:24, 554.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41742/154911 [01:11<03:19, 566.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41799/154911 [01:11<03:19, 565.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41869/154911 [01:11<03:07, 602.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 41930/154911 [01:11<03:20, 562.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42023/154911 [01:11<02:50, 663.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42091/154911 [01:11<02:51, 659.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42158/154911 [01:12<02:50, 660.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42226/154911 [01:12<02:49, 665.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42293/154911 [01:12<03:44, 500.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42401/154911 [01:12<03:02, 615.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42473/154911 [01:12<02:55, 640.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  27%|██▍      | 42542/154911 [01:12<02:57, 633.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 42630/154911 [01:12<02:54, 645.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 42698/154911 [01:12<02:57, 631.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 42768/154911 [01:13<02:52, 649.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 42837/154911 [01:13<02:56, 633.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 42902/154911 [01:13<03:19, 561.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 42960/154911 [01:13<03:24, 546.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▍      | 43027/154911 [01:13<03:26, 540.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43084/154911 [01:13<03:24, 547.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43141/154911 [01:13<03:21, 553.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43198/154911 [01:13<03:25, 543.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43253/154911 [01:13<03:28, 535.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43310/154911 [01:14<03:25, 543.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43365/154911 [01:14<03:27, 537.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43423/154911 [01:14<03:23, 548.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43478/154911 [01:14<03:39, 508.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43535/154911 [01:14<03:33, 522.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43588/154911 [01:14<03:32, 523.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43655/154911 [01:14<03:19, 557.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43734/154911 [01:14<02:59, 618.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43797/154911 [01:15<08:46, 210.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 43990/154911 [01:15<04:22, 421.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  28%|██▌      | 44080/154911 [01:15<04:08, 445.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44159/154911 [01:16<04:24, 418.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44225/154911 [01:16<04:26, 415.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44284/154911 [01:16<04:37, 399.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44340/154911 [01:16<04:19, 426.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44405/154911 [01:16<03:55, 468.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44461/154911 [01:16<03:59, 461.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44514/154911 [01:16<03:58, 463.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44567/154911 [01:16<03:57, 465.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44640/154911 [01:17<03:27, 530.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44710/154911 [01:17<03:12, 571.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44806/154911 [01:17<02:42, 677.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44901/154911 [01:17<02:26, 751.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 44991/154911 [01:17<02:18, 792.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 45075/154911 [01:17<02:17, 798.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▌      | 45157/154911 [01:17<02:49, 648.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45228/154911 [01:17<02:57, 616.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45295/154911 [01:18<03:11, 572.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45357/154911 [01:18<03:24, 534.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45431/154911 [01:18<03:07, 582.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45492/154911 [01:18<03:06, 587.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45563/154911 [01:18<03:01, 602.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45625/154911 [01:18<03:06, 584.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  29%|██▋      | 45685/154911 [01:18<03:10, 571.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 45756/154911 [01:18<02:59, 608.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 45818/154911 [01:18<03:15, 559.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 45876/154911 [01:19<03:16, 556.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 45935/154911 [01:19<03:22, 538.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 45990/154911 [01:19<03:22, 538.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46075/154911 [01:19<02:55, 621.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46138/154911 [01:19<03:08, 577.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46201/154911 [01:19<03:09, 572.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46285/154911 [01:19<02:50, 636.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46360/154911 [01:19<02:43, 663.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46429/154911 [01:19<02:42, 666.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46497/154911 [01:20<02:51, 631.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46576/154911 [01:20<02:40, 675.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46645/154911 [01:20<02:42, 667.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46715/154911 [01:20<02:40, 675.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46783/154911 [01:20<02:44, 656.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46850/154911 [01:20<02:46, 649.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46919/154911 [01:20<02:49, 638.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 46984/154911 [01:20<02:48, 639.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 47051/154911 [01:20<02:52, 625.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 47119/154911 [01:21<02:48, 640.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 47184/154911 [01:21<02:56, 609.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  30%|██▋      | 47246/154911 [01:21<02:56, 611.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▋      | 47333/154911 [01:21<02:37, 683.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47403/154911 [01:21<03:21, 533.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47531/154911 [01:21<02:30, 713.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47612/154911 [01:21<02:36, 686.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47688/154911 [01:21<02:36, 684.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47762/154911 [01:22<02:41, 663.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47835/154911 [01:22<02:37, 680.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47907/154911 [01:22<02:37, 680.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 47977/154911 [01:22<02:42, 658.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48044/154911 [01:22<02:50, 625.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48108/154911 [01:22<02:56, 606.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48171/154911 [01:22<02:59, 593.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48234/154911 [01:22<03:18, 536.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48315/154911 [01:22<02:57, 601.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48377/154911 [01:23<03:12, 552.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48434/154911 [01:23<03:17, 539.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48498/154911 [01:23<03:08, 565.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48557/154911 [01:23<03:07, 568.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48617/154911 [01:23<03:04, 575.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48683/154911 [01:23<02:57, 599.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  31%|██▊      | 48744/154911 [01:23<03:03, 579.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 48815/154911 [01:23<03:03, 577.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 48893/154911 [01:23<02:48, 630.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 48957/154911 [01:24<02:49, 624.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49028/154911 [01:24<03:38, 484.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49126/154911 [01:24<03:09, 558.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49188/154911 [01:24<03:04, 572.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49249/154911 [01:24<03:13, 546.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49306/154911 [01:24<03:12, 547.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49364/154911 [01:24<03:22, 521.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49418/154911 [01:24<03:47, 463.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▊      | 49467/154911 [01:25<03:53, 452.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49514/154911 [01:25<04:16, 410.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49581/154911 [01:25<03:41, 474.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49634/154911 [01:25<03:37, 483.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49687/154911 [01:25<03:43, 470.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49753/154911 [01:25<03:42, 471.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49826/154911 [01:25<03:19, 527.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49880/154911 [01:25<03:25, 510.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49937/154911 [01:26<03:20, 523.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 49991/154911 [01:26<03:22, 517.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 50063/154911 [01:26<03:03, 571.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 50137/154911 [01:26<02:50, 615.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 50208/154911 [01:26<02:44, 637.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 50273/154911 [01:26<02:47, 625.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  32%|██▉      | 50336/154911 [01:26<02:53, 602.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50430/154911 [01:26<02:29, 696.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50510/154911 [01:26<02:26, 710.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50631/154911 [01:26<02:04, 837.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50721/154911 [01:27<02:04, 835.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50805/154911 [01:27<02:10, 795.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50886/154911 [01:27<02:19, 747.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 50985/154911 [01:27<02:08, 810.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51068/154911 [01:27<02:37, 658.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51172/154911 [01:27<02:21, 734.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51251/154911 [01:27<02:39, 648.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51321/154911 [01:28<02:49, 609.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51386/154911 [01:28<03:20, 516.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51443/154911 [01:28<03:17, 522.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51499/154911 [01:28<03:18, 520.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51555/154911 [01:28<03:18, 521.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|██▉      | 51613/154911 [01:28<03:13, 534.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|███      | 51681/154911 [01:28<03:00, 571.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|███      | 51740/154911 [01:28<03:06, 552.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|███      | 51799/154911 [01:28<03:09, 545.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  33%|███      | 51858/154911 [01:29<03:04, 557.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 51949/154911 [01:29<02:38, 651.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52035/154911 [01:29<02:25, 708.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52133/154911 [01:29<02:20, 731.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52233/154911 [01:29<02:08, 799.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52314/154911 [01:29<02:14, 764.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52394/154911 [01:29<02:18, 738.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52469/154911 [01:29<02:30, 679.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52539/154911 [01:29<02:40, 636.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52604/154911 [01:30<02:52, 591.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52667/154911 [01:30<03:03, 558.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52724/154911 [01:30<03:08, 541.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52787/154911 [01:30<03:01, 563.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52844/154911 [01:30<03:13, 527.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|█���█      | 52898/154911 [01:30<03:15, 522.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 52951/154911 [01:30<03:20, 508.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53003/154911 [01:30<03:33, 476.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53060/154911 [01:31<03:24, 497.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53112/154911 [01:31<03:26, 493.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53191/154911 [01:31<02:57, 572.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53250/154911 [01:31<03:08, 538.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53315/154911 [01:31<02:59, 564.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53383/154911 [01:31<02:51, 593.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  34%|███      | 53444/154911 [01:31<02:55, 577.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███      | 53507/154911 [01:31<02:54, 580.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███      | 53572/154911 [01:31<02:56, 573.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███      | 53643/154911 [01:32<02:46, 609.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███      | 53706/154911 [01:32<02:44, 614.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███      | 53768/154911 [01:32<03:00, 560.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 53826/154911 [01:32<03:30, 480.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 53877/154911 [01:32<03:48, 442.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 53930/154911 [01:32<03:38, 461.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 53991/154911 [01:32<03:24, 493.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54067/154911 [01:32<02:58, 563.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54126/154911 [01:32<03:02, 551.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54183/154911 [01:33<03:02, 552.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54241/154911 [01:33<04:02, 415.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54373/154911 [01:33<02:41, 622.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54446/154911 [01:33<03:01, 552.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54510/154911 [01:33<03:03, 545.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54571/154911 [01:33<03:14, 515.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54627/154911 [01:33<03:28, 481.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54679/154911 [01:34<03:28, 480.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54730/154911 [01:34<03:31, 473.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54779/154911 [01:34<03:38, 458.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54839/154911 [01:34<03:28, 479.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54892/154911 [01:34<03:29, 477.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  35%|███▏     | 54967/154911 [01:34<03:07, 533.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55032/154911 [01:34<02:57, 563.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55089/154911 [01:34<02:58, 560.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55149/154911 [01:34<02:55, 568.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55217/154911 [01:35<02:46, 599.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55294/154911 [01:35<02:35, 641.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55368/154911 [01:35<02:31, 655.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55463/154911 [01:35<02:17, 724.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55549/154911 [01:35<02:10, 763.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55628/154911 [01:35<02:13, 743.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55703/154911 [01:35<02:16, 725.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55776/154911 [01:35<02:21, 702.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55848/154911 [01:35<02:20, 704.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▏     | 55919/154911 [01:36<02:34, 639.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 55992/154911 [01:36<02:30, 658.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 56059/154911 [01:36<02:32, 648.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 56130/154911 [01:36<02:29, 659.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 56204/154911 [01:36<02:26, 675.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 56272/154911 [01:36<02:39, 617.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 7] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\r\n",
+      "[2023-09-02 06:20:40,878] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 56381/154911 [01:36<02:12, 745.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  36%|███▎     | 56460/154911 [01:36<02:09, 757.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|███▎     | 56560/154911 [01:36<01:59, 826.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|███▎     | 56685/154911 [01:36<01:44, 943.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|███▎     | 56793/154911 [01:37<01:40, 980.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 56909/154911 [01:37<01:35, 1031.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57030/154911 [01:37<01:30, 1076.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57141/154911 [01:37<01:31, 1073.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57266/154911 [01:37<01:26, 1122.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 4] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\r\n",
+      "[2023-09-02 06:20:41,745] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57379/154911 [01:37<01:32, 1050.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57486/154911 [01:37<01:35, 1020.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57589/154911 [01:37<01:35, 1015.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 3] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\r\n",
+      "[2023-09-02 06:20:42,075] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|███▎     | 57692/154911 [01:37<01:38, 983.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57855/154911 [01:38<01:23, 1165.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 57974/154911 [01:38<01:32, 1047.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  37%|██▉     | 58083/154911 [01:38<01:33, 1033.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58194/154911 [01:38<01:32, 1049.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58301/154911 [01:38<01:31, 1052.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58408/154911 [01:38<01:34, 1016.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58550/154911 [01:38<01:25, 1128.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58673/154911 [01:38<01:23, 1157.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58790/154911 [01:38<01:25, 1121.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 58904/154911 [01:39<01:26, 1111.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 59032/154911 [01:39<01:23, 1149.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███     | 59152/154911 [01:39<01:31, 1043.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███▍     | 59260/154911 [01:39<01:39, 958.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███▍     | 59360/154911 [01:39<01:38, 965.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███▍     | 59459/154911 [01:39<01:45, 904.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  38%|███▍     | 59557/154911 [01:39<01:43, 923.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▍     | 59651/154911 [01:39<01:51, 852.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▍     | 59738/154911 [01:40<02:19, 681.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▍     | 59883/154911 [01:40<01:50, 858.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▍     | 59979/154911 [01:40<01:59, 797.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▍     | 60095/154911 [01:40<01:47, 885.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▍     | 60191/154911 [01:40<02:12, 712.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60275/154911 [01:40<02:07, 740.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60357/154911 [01:40<02:19, 675.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60431/154911 [01:40<02:20, 672.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60504/154911 [01:41<02:20, 673.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60588/154911 [01:41<02:12, 711.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60681/154911 [01:41<02:03, 760.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60760/154911 [01:41<02:10, 722.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60835/154911 [01:41<02:22, 660.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60904/154911 [01:41<02:26, 642.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 60979/154911 [01:41<02:20, 667.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 61048/154911 [01:41<02:21, 663.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  39%|███▌     | 61127/154911 [01:41<02:15, 694.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61227/154911 [01:42<02:00, 779.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61308/154911 [01:42<02:16, 687.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61417/154911 [01:42<01:58, 789.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61500/154911 [01:42<01:59, 783.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61581/154911 [01:42<02:02, 760.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61659/154911 [01:42<02:05, 744.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61735/154911 [01:42<02:19, 669.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61804/154911 [01:42<02:25, 640.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61870/154911 [01:43<02:26, 632.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61938/154911 [01:43<03:10, 487.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 61994/154911 [01:43<03:08, 491.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 62091/154911 [01:43<02:33, 602.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 62160/154911 [01:43<02:30, 616.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 62226/154911 [01:43<02:31, 610.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 62290/154911 [01:43<02:33, 601.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▌     | 62357/154911 [01:43<02:29, 619.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▋     | 62432/154911 [01:44<02:20, 655.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▋     | 62508/154911 [01:44<02:15, 679.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▋     | 62577/154911 [01:44<02:15, 681.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  40%|███▋     | 62649/154911 [01:44<02:24, 636.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 62746/154911 [01:44<02:07, 725.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 62820/154911 [01:44<02:13, 688.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 62915/154911 [01:44<02:01, 754.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 63020/154911 [01:44<01:51, 826.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 63127/154911 [01:44<01:43, 889.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 63219/154911 [01:44<01:42, 897.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 63324/154911 [01:45<01:37, 939.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▋     | 63437/154911 [01:45<01:33, 981.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▎    | 63610/154911 [01:45<01:16, 1190.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▎    | 63730/154911 [01:45<01:17, 1180.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▎    | 63861/154911 [01:45<01:15, 1204.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▎    | 64001/154911 [01:45<01:12, 1259.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  41%|███▎    | 64128/154911 [01:45<01:17, 1176.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 5] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\r\n",
+      "[2023-09-02 06:20:50,033] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
+      "\r",
+      "Map (num_proc=64):  41%|███▎    | 64247/154911 [01:45<01:23, 1092.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 64359/154911 [01:45<01:28, 1028.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 64465/154911 [01:46<01:30, 1002.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 64570/154911 [01:46<01:28, 1015.20 examples/s][rank: 1] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\r\n",
+      "[2023-09-02 06:20:50,377] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 64699/154911 [01:46<01:22, 1091.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 64842/154911 [01:46<01:15, 1186.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 64964/154911 [01:46<01:15, 1192.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 65087/154911 [01:46<01:17, 1165.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 65205/154911 [01:46<01:17, 1157.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▎    | 65334/154911 [01:46<01:15, 1193.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▍    | 65458/154911 [01:46<01:14, 1206.44 examples/s][rank: 2] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\r\n",
+      "[2023-09-02 06:20:51,124] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▍    | 65594/154911 [01:47<01:12, 1239.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  42%|███▍    | 65720/154911 [01:47<01:21, 1090.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 65916/154911 [01:47<01:07, 1323.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 66064/154911 [01:47<01:05, 1365.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 66241/154911 [01:47<00:59, 1478.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 66451/154911 [01:47<00:53, 1657.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 66675/154911 [01:47<00:48, 1822.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 66876/154911 [01:47<00:46, 1875.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 67068/154911 [01:47<00:47, 1860.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  43%|███▍    | 67256/154911 [01:47<00:48, 1822.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▍    | 67452/154911 [01:48<00:47, 1859.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▍    | 67673/154911 [01:48<00:44, 1947.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▌    | 67917/154911 [01:48<00:41, 2087.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▌    | 68130/154911 [01:48<00:42, 2051.63 examples/s][rank: 6] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\r\n",
+      "[2023-09-02 06:20:52,577] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▌    | 68338/154911 [01:48<00:43, 1980.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▌    | 68538/154911 [01:48<00:44, 1921.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▌    | 68731/154911 [01:48<00:45, 1904.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  44%|███▌    | 68923/154911 [01:48<00:45, 1900.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▌    | 69114/154911 [01:48<00:47, 1792.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▌    | 69325/154911 [01:49<00:45, 1880.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▌    | 69541/154911 [01:49<00:43, 1945.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▌    | 69742/154911 [01:49<00:43, 1951.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▌    | 69943/154911 [01:49<00:43, 1965.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▌    | 70143/154911 [01:49<00:43, 1964.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  45%|███▋    | 70342/154911 [01:49<00:44, 1903.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 70536/154911 [01:49<00:46, 1831.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 70782/154911 [01:49<00:46, 1827.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 70966/154911 [01:49<00:46, 1813.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 71148/154911 [01:50<00:46, 1792.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 71328/154911 [01:50<00:46, 1785.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 71507/154911 [01:50<00:48, 1717.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 71681/154911 [01:50<00:48, 1717.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  46%|███▋    | 71858/154911 [01:50<00:47, 1732.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▋    | 72049/154911 [01:50<00:46, 1765.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▋    | 72247/154911 [01:50<00:45, 1824.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▋    | 72452/154911 [01:50<00:43, 1890.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▊    | 72711/154911 [01:50<00:39, 2085.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▊    | 72922/154911 [01:50<00:39, 2083.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▊    | 73131/154911 [01:51<00:41, 1951.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▊    | 73354/154911 [01:51<00:40, 2028.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  47%|███▊    | 73559/154911 [01:51<00:40, 2020.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 73764/154911 [01:51<00:41, 1967.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 73963/154911 [01:51<00:42, 1907.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 74155/154911 [01:51<00:43, 1845.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 74341/154911 [01:51<00:44, 1818.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 74525/154911 [01:51<00:45, 1775.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 74703/154911 [01:51<00:46, 1741.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▊    | 74878/154911 [01:52<00:46, 1730.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  48%|███▉    | 75072/154911 [01:52<00:44, 1781.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 75251/154911 [01:52<00:46, 1726.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 75429/154911 [01:52<00:45, 1739.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 75618/154911 [01:52<00:44, 1772.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 75816/154911 [01:52<00:43, 1829.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 76105/154911 [01:52<00:37, 2127.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 76320/154911 [01:52<00:38, 2029.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  49%|███▉    | 76533/154911 [01:52<00:39, 1989.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|███▉    | 76748/154911 [01:52<00:38, 2033.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|███▉    | 76953/154911 [01:53<00:39, 1980.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|███▉    | 77152/154911 [01:53<00:39, 1965.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|███▉    | 77351/154911 [01:53<00:40, 1905.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|████    | 77544/154911 [01:53<00:42, 1829.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|████    | 77729/154911 [01:53<00:43, 1783.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|████    | 77940/154911 [01:53<00:41, 1874.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  50%|████    | 78130/154911 [01:53<00:41, 1863.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 78357/154911 [01:53<00:38, 1974.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 78559/154911 [01:53<00:43, 1774.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 78758/154911 [01:54<00:41, 1828.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 78946/154911 [01:54<00:41, 1829.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 79133/154911 [01:54<00:43, 1760.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 79313/154911 [01:54<00:43, 1754.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 79490/154911 [01:54<00:43, 1743.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  51%|████    | 79668/154911 [01:54<00:44, 1696.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████    | 79870/154911 [01:54<00:42, 1777.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 80050/154911 [01:54<00:42, 1762.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 80227/154911 [01:54<00:42, 1759.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 80404/154911 [01:55<00:44, 1678.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 80573/154911 [01:55<00:44, 1670.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 80752/154911 [01:55<00:43, 1692.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 80922/154911 [01:55<00:44, 1680.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 81127/154911 [01:55<00:41, 1782.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  52%|████▏   | 81311/154911 [01:55<00:41, 1791.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▏   | 81491/154911 [01:55<00:43, 1680.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▏   | 81716/154911 [01:55<00:39, 1836.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▏   | 81910/154911 [01:55<00:39, 1849.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▏   | 82098/154911 [01:55<00:41, 1744.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▏   | 82276/154911 [01:56<00:44, 1637.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▎   | 82443/154911 [01:56<00:46, 1549.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▎   | 82601/154911 [01:56<00:48, 1488.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  53%|████▎   | 82754/154911 [01:56<00:50, 1440.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 82901/154911 [01:56<00:51, 1405.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 83054/154911 [01:56<00:50, 1436.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 83269/154911 [01:56<00:44, 1620.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 83507/154911 [01:56<00:38, 1835.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 83696/154911 [01:56<00:38, 1837.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 83883/154911 [01:57<00:42, 1662.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 84056/154911 [01:57<00:44, 1592.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 84220/154911 [01:57<00:46, 1530.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  54%|████▎   | 84377/154911 [01:57<00:46, 1527.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▎   | 84532/154911 [01:57<00:46, 1506.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▎   | 84686/154911 [01:57<00:47, 1481.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 84835/154911 [01:57<00:47, 1472.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 84995/154911 [01:57<00:46, 1505.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 85148/154911 [01:57<00:47, 1476.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 85310/154911 [01:58<00:45, 1513.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 85467/154911 [01:58<00:45, 1524.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 85634/154911 [01:58<00:44, 1549.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 85808/154911 [01:58<00:43, 1582.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  55%|████▍   | 85971/154911 [01:58<00:43, 1596.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 86140/154911 [01:58<00:42, 1623.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 86305/154911 [01:58<00:42, 1600.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 86467/154911 [01:58<00:43, 1576.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 86625/154911 [01:58<00:43, 1570.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 86783/154911 [01:59<00:43, 1563.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 86954/154911 [01:59<00:42, 1593.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▍   | 87115/154911 [01:59<00:42, 1594.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▌   | 87281/154911 [01:59<00:42, 1596.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  56%|████▌   | 87445/154911 [01:59<00:42, 1592.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 87617/154911 [01:59<00:41, 1625.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 87797/154911 [01:59<00:40, 1676.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 87991/154911 [01:59<00:38, 1752.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 88177/154911 [01:59<00:37, 1780.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 88356/154911 [01:59<00:37, 1761.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 88533/154911 [02:00<00:39, 1671.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 88703/154911 [02:00<00:40, 1625.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 88879/154911 [02:00<00:39, 1652.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  57%|████▌   | 89046/154911 [02:00<00:40, 1638.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▌   | 89211/154911 [02:00<00:41, 1582.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▌   | 89370/154911 [02:00<00:44, 1465.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▌   | 89542/154911 [02:00<00:43, 1512.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▋   | 89700/154911 [02:00<00:42, 1519.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▋   | 89854/154911 [02:00<00:49, 1325.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▋   | 90084/154911 [02:01<00:41, 1576.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▋   | 90249/154911 [02:01<00:42, 1531.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▋   | 90409/154911 [02:01<00:42, 1517.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  58%|████▋   | 90566/154911 [02:01<00:43, 1489.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 90726/154911 [02:01<00:42, 1508.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 90880/154911 [02:01<00:42, 1504.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 91034/154911 [02:01<00:42, 1511.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 91187/154911 [02:01<00:42, 1492.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 91338/154911 [02:01<00:42, 1490.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 91488/154911 [02:02<00:42, 1482.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 91667/154911 [02:02<00:40, 1572.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▋   | 91857/154911 [02:02<00:37, 1667.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  59%|████▊   | 92025/154911 [02:02<00:38, 1644.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 92190/154911 [02:02<00:40, 1542.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 92346/154911 [02:02<00:40, 1541.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 92503/154911 [02:02<00:40, 1525.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 92658/154911 [02:02<00:41, 1495.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 92810/154911 [02:02<00:41, 1484.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 92961/154911 [02:02<00:42, 1465.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 93110/154911 [02:03<00:42, 1463.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 93266/154911 [02:03<00:41, 1485.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 93425/154911 [02:03<00:40, 1513.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  60%|████▊   | 93578/154911 [02:03<00:40, 1502.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▊   | 93729/154911 [02:03<00:42, 1453.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▊   | 93886/154911 [02:03<00:41, 1475.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▊   | 94036/154911 [02:03<00:41, 1482.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▊   | 94191/154911 [02:03<00:41, 1473.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▊   | 94379/154911 [02:03<00:38, 1585.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▉   | 94578/154911 [02:04<00:35, 1703.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▉   | 94749/154911 [02:04<00:35, 1686.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▉   | 94920/154911 [02:04<00:37, 1620.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▉   | 95083/154911 [02:04<00:37, 1585.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  61%|████▉   | 95243/154911 [02:04<00:38, 1546.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 95401/154911 [02:04<00:39, 1501.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 95553/154911 [02:04<00:40, 1478.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 95702/154911 [02:04<00:41, 1411.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 95844/154911 [02:04<00:42, 1395.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 95984/154911 [02:05<00:42, 1380.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 96123/154911 [02:05<00:43, 1349.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 96266/154911 [02:05<00:43, 1363.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 96406/154911 [02:05<00:42, 1367.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 96553/154911 [02:05<00:41, 1392.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  62%|████▉   | 96693/154911 [02:05<00:45, 1282.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 96851/154911 [02:05<00:42, 1364.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 96991/154911 [02:05<00:43, 1328.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 97189/154911 [02:05<00:38, 1510.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 97343/154911 [02:05<00:37, 1515.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 97498/154911 [02:06<00:38, 1477.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 97647/154911 [02:06<00:39, 1439.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 97792/154911 [02:06<00:41, 1365.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 97936/154911 [02:06<00:41, 1385.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 98076/154911 [02:06<00:41, 1366.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 98215/154911 [02:06<00:41, 1351.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  63%|█████   | 98352/154911 [02:06<00:41, 1349.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████   | 98488/154911 [02:06<00:43, 1311.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████   | 98620/154911 [02:06<00:43, 1303.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████   | 98752/154911 [02:07<00:44, 1271.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████   | 98896/154911 [02:07<00:42, 1312.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████   | 99062/154911 [02:07<00:39, 1411.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████▏  | 99276/154911 [02:07<00:34, 1624.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████▏  | 99441/154911 [02:07<00:35, 1559.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████▏  | 99600/154911 [02:07<00:35, 1557.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████▏  | 99758/154911 [02:07<00:37, 1466.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  64%|█████▏  | 99908/154911 [02:07<00:42, 1295.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100065/154911 [02:07<00:40, 1362.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100206/154911 [02:08<00:40, 1353.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100346/154911 [02:08<00:40, 1343.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100484/154911 [02:08<00:40, 1332.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100626/154911 [02:08<00:40, 1339.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100771/154911 [02:08<00:39, 1356.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 100927/154911 [02:08<00:38, 1405.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 101071/154911 [02:08<00:39, 1377.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 101212/154911 [02:08<00:39, 1361.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  65%|████▌  | 101363/154911 [02:08<00:38, 1401.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▌  | 101505/154911 [02:08<00:38, 1397.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▌  | 101663/154911 [02:09<00:37, 1433.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▌  | 101809/154911 [02:09<00:37, 1425.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▌  | 101965/154911 [02:09<00:36, 1464.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▌  | 102145/154911 [02:09<00:33, 1557.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▌  | 102306/154911 [02:09<00:34, 1542.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▋  | 102463/154911 [02:09<00:34, 1537.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▋  | 102617/154911 [02:09<00:35, 1475.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▋  | 102766/154911 [02:09<00:36, 1410.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  66%|████▋  | 102909/154911 [02:09<00:37, 1390.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103050/154911 [02:10<00:37, 1371.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103188/154911 [02:10<00:38, 1351.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103324/154911 [02:10<00:38, 1351.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103460/154911 [02:10<00:38, 1342.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103599/154911 [02:10<00:38, 1338.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103746/154911 [02:10<00:37, 1373.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 103886/154911 [02:10<00:37, 1378.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 104030/154911 [02:10<00:36, 1395.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 104170/154911 [02:10<00:36, 1388.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 104320/154911 [02:10<00:35, 1416.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  67%|████▋  | 104473/154911 [02:11<00:34, 1447.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▋  | 104620/154911 [02:11<00:34, 1448.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▋  | 104767/154911 [02:11<00:34, 1454.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▋  | 104921/154911 [02:11<00:33, 1477.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▋  | 105092/154911 [02:11<00:32, 1546.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▊  | 105247/154911 [02:11<00:33, 1482.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▊  | 105397/154911 [02:11<00:34, 1446.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▊  | 105544/154911 [02:11<00:34, 1414.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▊  | 105690/154911 [02:11<00:34, 1421.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▊  | 105834/154911 [02:12<00:34, 1409.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  68%|████▊  | 105976/154911 [02:12<00:35, 1367.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 106136/154911 [02:12<00:34, 1416.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 106311/154911 [02:12<00:32, 1509.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 106470/154911 [02:12<00:31, 1527.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 106626/154911 [02:12<00:33, 1428.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 106778/154911 [02:12<00:33, 1450.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 106937/154911 [02:12<00:32, 1470.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 107105/154911 [02:12<00:31, 1509.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 107258/154911 [02:12<00:33, 1438.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 107403/154911 [02:13<00:33, 1406.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  69%|████▊  | 107546/154911 [02:13<00:35, 1331.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▊  | 107682/154911 [02:13<00:35, 1326.97 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▊  | 107817/154911 [02:13<00:35, 1330.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 107966/154911 [02:13<00:34, 1366.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108104/154911 [02:13<00:35, 1330.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108239/154911 [02:13<00:35, 1306.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108370/154911 [02:13<00:35, 1293.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108500/154911 [02:13<00:37, 1252.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108631/154911 [02:14<00:36, 1261.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108759/154911 [02:14<00:37, 1214.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 108881/154911 [02:14<00:38, 1191.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 109015/154911 [02:14<00:38, 1203.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  70%|████▉  | 109136/154911 [02:14<00:39, 1148.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109254/154911 [02:14<00:39, 1156.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109376/154911 [02:14<00:39, 1164.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109493/154911 [02:14<00:39, 1137.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109609/154911 [02:14<00:39, 1143.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109724/154911 [02:15<00:41, 1099.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109841/154911 [02:15<00:40, 1115.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 109960/154911 [02:15<00:39, 1134.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 110085/154911 [02:15<00:38, 1158.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 110207/154911 [02:15<00:38, 1175.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 110326/154911 [02:15<00:38, 1157.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 110446/154911 [02:15<00:38, 1166.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|████▉  | 110564/154911 [02:15<00:38, 1147.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  71%|█████  | 110679/154911 [02:15<00:38, 1139.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 110800/154911 [02:15<00:38, 1156.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 110919/154911 [02:16<00:37, 1160.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111053/154911 [02:16<00:36, 1206.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111234/154911 [02:16<00:31, 1379.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111386/154911 [02:16<00:30, 1421.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111530/154911 [02:16<00:33, 1291.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111663/154911 [02:16<00:36, 1194.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111786/154911 [02:16<00:36, 1168.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 111906/154911 [02:16<00:38, 1111.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 112020/154911 [02:16<00:38, 1114.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 112134/154911 [02:17<00:39, 1089.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  72%|█████  | 112244/154911 [02:17<00:39, 1089.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 112369/154911 [02:17<00:37, 1134.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 112484/154911 [02:17<00:37, 1138.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 112600/154911 [02:17<00:38, 1089.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 112711/154911 [02:17<00:39, 1061.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 112823/154911 [02:17<00:39, 1070.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 112948/154911 [02:17<00:37, 1120.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 113062/154911 [02:17<00:39, 1062.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 113179/154911 [02:18<00:38, 1083.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 113291/154911 [02:18<00:39, 1059.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████  | 113403/154911 [02:18<00:39, 1062.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████▏ | 113510/154911 [02:18<00:39, 1052.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████▏ | 113622/154911 [02:18<00:38, 1070.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████▏ | 113730/154911 [02:18<00:38, 1070.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  73%|█████▏ | 113843/154911 [02:18<00:38, 1068.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 113962/154911 [02:18<00:37, 1093.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 114089/154911 [02:18<00:35, 1142.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 114227/154911 [02:18<00:33, 1211.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 114398/154911 [02:19<00:29, 1356.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 114603/154911 [02:19<00:26, 1548.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 114760/154911 [02:19<00:27, 1465.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 114908/154911 [02:19<00:28, 1403.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 115050/154911 [02:19<00:30, 1319.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 115185/154911 [02:19<00:30, 1295.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  74%|█████▏ | 115317/154911 [02:19<00:31, 1256.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 115444/154911 [02:19<00:32, 1219.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 115567/154911 [02:19<00:33, 1178.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 115692/154911 [02:20<00:33, 1182.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 115811/154911 [02:20<00:33, 1156.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 115932/154911 [02:20<00:33, 1169.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 116050/154911 [02:20<00:33, 1160.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▏ | 116167/154911 [02:20<00:33, 1160.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▎ | 116287/154911 [02:20<00:33, 1169.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▎ | 116407/154911 [02:20<00:33, 1153.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▎ | 116524/154911 [02:20<00:33, 1152.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▎ | 116646/154911 [02:20<00:32, 1172.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▎ | 116764/154911 [02:21<00:32, 1163.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  75%|█████▎ | 116895/154911 [02:21<00:31, 1194.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117015/154911 [02:21<00:33, 1136.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117154/154911 [02:21<00:31, 1205.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117276/154911 [02:21<00:32, 1158.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117408/154911 [02:21<00:31, 1204.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117532/154911 [02:21<00:30, 1212.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117659/154911 [02:21<00:30, 1228.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117791/154911 [02:21<00:29, 1255.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 117918/154911 [02:21<00:29, 1245.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 118044/154911 [02:22<00:29, 1241.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 118174/154911 [02:22<00:29, 1256.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 118301/154911 [02:22<00:29, 1258.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  76%|█████▎ | 118428/154911 [02:22<00:29, 1252.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▎ | 118554/154911 [02:22<00:29, 1232.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▎ | 118688/154911 [02:22<00:28, 1261.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▎ | 118815/154911 [02:22<00:29, 1225.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▎ | 118939/154911 [02:22<00:30, 1196.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119061/154911 [02:22<00:29, 1201.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119183/154911 [02:22<00:29, 1193.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119303/154911 [02:23<00:30, 1179.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119426/154911 [02:23<00:29, 1186.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119549/154911 [02:23<00:29, 1194.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119687/154911 [02:23<00:28, 1237.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119825/154911 [02:23<00:27, 1278.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  77%|█████▍ | 119984/154911 [02:23<00:25, 1369.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120181/154911 [02:23<00:22, 1545.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120336/154911 [02:23<00:25, 1368.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120477/154911 [02:23<00:26, 1305.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120612/154911 [02:24<00:27, 1243.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120747/154911 [02:24<00:27, 1264.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120876/154911 [02:24<00:28, 1196.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 120998/154911 [02:24<00:29, 1158.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 121124/154911 [02:24<00:29, 1155.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 121248/154911 [02:24<00:28, 1177.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 121369/154911 [02:24<00:28, 1164.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 121487/154911 [02:24<00:29, 1129.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  78%|█████▍ | 121605/154911 [02:24<00:29, 1136.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 121720/154911 [02:25<00:29, 1109.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 121832/154911 [02:25<00:32, 1022.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 121965/154911 [02:25<00:29, 1099.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122084/154911 [02:25<00:29, 1108.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122197/154911 [02:25<00:29, 1101.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122309/154911 [02:25<00:29, 1104.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122421/154911 [02:25<00:29, 1106.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122534/154911 [02:25<00:29, 1095.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122644/154911 [02:25<00:29, 1080.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122760/154911 [02:26<00:29, 1098.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122871/154911 [02:26<00:29, 1095.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  79%|█████▌ | 122993/154911 [02:26<00:28, 1131.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 123181/154911 [02:26<00:23, 1349.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 123399/154911 [02:26<00:19, 1592.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 123559/154911 [02:26<00:20, 1505.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 123724/154911 [02:26<00:20, 1542.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 123881/154911 [02:26<00:20, 1536.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 124037/154911 [02:26<00:21, 1455.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 124184/154911 [02:26<00:22, 1376.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 124324/154911 [02:27<00:22, 1336.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▌ | 124460/154911 [02:27<00:22, 1325.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  80%|█████▋ | 124595/154911 [02:27<00:23, 1289.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 124725/154911 [02:27<00:23, 1265.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 124852/154911 [02:27<00:24, 1247.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 124980/154911 [02:27<00:24, 1246.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125105/154911 [02:27<00:24, 1238.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125230/154911 [02:27<00:24, 1211.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125353/154911 [02:27<00:25, 1169.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125472/154911 [02:28<00:25, 1173.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125624/154911 [02:28<00:23, 1269.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125771/154911 [02:28<00:21, 1325.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 125906/154911 [02:28<00:23, 1254.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 126033/154911 [02:28<00:23, 1213.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  81%|█████▋ | 126156/154911 [02:28<00:23, 1200.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126278/154911 [02:28<00:24, 1177.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126399/154911 [02:28<00:25, 1139.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126515/154911 [02:28<00:25, 1112.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126638/154911 [02:29<00:24, 1134.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126754/154911 [02:29<00:25, 1122.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126875/154911 [02:29<00:24, 1137.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 126990/154911 [02:29<00:25, 1102.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 127101/154911 [02:29<00:25, 1094.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▋ | 127215/154911 [02:29<00:25, 1105.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▊ | 127332/154911 [02:29<00:24, 1115.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▊ | 127444/154911 [02:29<00:24, 1101.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▊ | 127555/154911 [02:29<00:25, 1089.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▊ | 127665/154911 [02:29<00:25, 1073.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  82%|█████▊ | 127773/154911 [02:30<00:25, 1050.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 127891/154911 [02:30<00:25, 1078.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128005/154911 [02:30<00:24, 1095.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128122/154911 [02:30<00:24, 1109.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128237/154911 [02:30<00:23, 1120.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128350/154911 [02:30<00:23, 1113.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128462/154911 [02:30<00:23, 1102.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128573/154911 [02:30<00:23, 1102.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128697/154911 [02:30<00:22, 1141.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128817/154911 [02:31<00:22, 1158.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 128964/154911 [02:31<00:20, 1241.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 129099/154911 [02:31<00:20, 1266.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 129226/154911 [02:31<00:21, 1212.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  83%|█████▊ | 129348/154911 [02:31<00:22, 1151.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▊ | 129464/154911 [02:31<00:22, 1106.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▊ | 129576/154911 [02:31<00:23, 1073.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▊ | 129684/154911 [02:31<00:23, 1063.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▊ | 129793/154911 [02:31<00:23, 1060.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▊ | 129901/154911 [02:31<00:23, 1046.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▊ | 130006/154911 [02:32<00:23, 1042.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|█████▉ | 130112/154911 [02:32<00:24, 1015.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▋ | 130214/154911 [02:32<00:25, 955.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▋ | 130321/154911 [02:32<00:24, 985.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▋ | 130421/154911 [02:32<00:24, 980.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▋ | 130520/154911 [02:32<00:25, 973.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▋ | 130626/154911 [02:32<00:24, 995.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▊ | 130726/154911 [02:32<00:25, 937.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  84%|██████▊ | 130841/154911 [02:32<00:24, 996.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 130949/154911 [02:33<00:23, 1017.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131053/154911 [02:33<00:23, 1008.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|██████▊ | 131156/154911 [02:33<00:23, 992.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131264/154911 [02:33<00:23, 1011.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131368/154911 [02:33<00:23, 1000.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131478/154911 [02:33<00:22, 1025.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131583/154911 [02:33<00:22, 1017.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|██████▊ | 131687/154911 [02:33<00:23, 995.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131797/154911 [02:33<00:22, 1024.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 131901/154911 [02:33<00:22, 1019.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 132004/154911 [02:34<00:22, 1016.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 132111/154911 [02:34<00:22, 1030.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 132215/154911 [02:34<00:22, 1000.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|█████▉ | 132318/154911 [02:34<00:22, 1000.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  85%|██████▊ | 132421/154911 [02:34<00:22, 995.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▊ | 132522/154911 [02:34<00:22, 984.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|█████▉ | 132629/154911 [02:34<00:22, 1009.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▊ | 132732/154911 [02:34<00:22, 986.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▊ | 132834/154911 [02:34<00:22, 992.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▊ | 132934/154911 [02:35<00:22, 994.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▊ | 133036/154911 [02:35<00:22, 968.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133134/154911 [02:35<00:22, 970.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133233/154911 [02:35<00:22, 968.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133332/154911 [02:35<00:22, 974.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133431/154911 [02:35<00:23, 929.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133533/154911 [02:35<00:22, 954.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133629/154911 [02:35<00:22, 932.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133723/154911 [02:35<00:23, 903.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133815/154911 [02:35<00:23, 885.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  86%|██████▉ | 133915/154911 [02:36<00:23, 907.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134008/154911 [02:36<00:23, 898.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134100/154911 [02:36<00:23, 890.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134208/154911 [02:36<00:22, 927.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134301/154911 [02:36<00:22, 927.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134398/154911 [02:36<00:22, 926.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████��� | 134492/154911 [02:36<00:22, 908.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134584/154911 [02:36<00:22, 891.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134675/154911 [02:36<00:23, 878.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134770/154911 [02:37<00:22, 898.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████▉ | 134861/154911 [02:37<00:23, 870.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████ | 134999/154911 [02:37<00:19, 1014.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████ | 135149/154911 [02:37<00:17, 1154.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████ | 135288/154911 [02:37<00:16, 1221.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████ | 135412/154911 [02:37<00:17, 1137.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  87%|██████ | 135528/154911 [02:37<00:18, 1059.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|██████▏| 135636/154911 [02:37<00:19, 1002.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|██████▏| 135743/154911 [02:37<00:18, 1012.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|██████▏| 135861/154911 [02:38<00:18, 1051.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|██████▏| 135969/154911 [02:38<00:18, 1029.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136074/154911 [02:38<00:19, 967.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136172/154911 [02:38<00:21, 889.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136264/154911 [02:38<00:21, 850.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136352/154911 [02:38<00:22, 822.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136435/154911 [02:38<00:22, 822.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136520/154911 [02:38<00:22, 799.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136605/154911 [02:38<00:22, 803.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136691/154911 [02:39<00:22, 792.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136775/154911 [02:39<00:22, 805.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136857/154911 [02:39<00:22, 804.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 136947/154911 [02:39<00:21, 824.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  88%|███████ | 137032/154911 [02:39<00:21, 831.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████ | 137134/154911 [02:39<00:20, 883.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|██████▏| 137291/154911 [02:39<00:16, 1082.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|██████▏| 137401/154911 [02:39<00:16, 1074.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████ | 137509/154911 [02:39<00:17, 969.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████ | 137609/154911 [02:40<00:19, 905.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████ | 137703/154911 [02:40<00:19, 874.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████ | 137793/154911 [02:40<00:20, 834.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████ | 137898/154911 [02:40<00:19, 877.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138015/154911 [02:40<00:17, 955.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138130/154911 [02:40<00:16, 996.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138232/154911 [02:40<00:17, 934.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138329/154911 [02:40<00:18, 878.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138428/154911 [02:40<00:18, 906.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138521/154911 [02:41<00:18, 882.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  89%|███████▏| 138613/154911 [02:41<00:18, 885.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 138703/154911 [02:41<00:18, 888.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 138793/154911 [02:41<00:18, 875.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 138882/154911 [02:41<00:18, 873.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 138992/154911 [02:41<00:17, 932.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139086/154911 [02:41<00:17, 924.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139204/154911 [02:41<00:15, 982.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139305/154911 [02:41<00:16, 970.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139404/154911 [02:42<00:16, 912.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139504/154911 [02:42<00:16, 934.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139599/154911 [02:42<00:17, 891.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139708/154911 [02:42<00:16, 946.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139805/154911 [02:42<00:15, 950.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139901/154911 [02:42<00:16, 930.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 139996/154911 [02:42<00:16, 901.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 140089/154911 [02:42<00:16, 898.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  90%|███████▏| 140180/154911 [02:42<00:16, 880.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▏| 140269/154911 [02:42<00:16, 872.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▏| 140367/154911 [02:43<00:16, 899.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 140458/154911 [02:43<00:16, 877.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 140557/154911 [02:43<00:16, 889.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 140647/154911 [02:43<00:15, 891.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 140738/154911 [02:43<00:15, 895.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 140829/154911 [02:43<00:15, 880.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 140920/154911 [02:43<00:15, 874.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141008/154911 [02:43<00:16, 860.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141103/154911 [02:43<00:15, 883.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141193/154911 [02:44<00:15, 859.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141288/154911 [02:44<00:15, 885.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141378/154911 [02:44<00:15, 873.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141467/154911 [02:44<00:15, 859.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141556/154911 [02:44<00:15, 864.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141643/154911 [02:44<00:15, 843.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  91%|███████▎| 141728/154911 [02:44<00:15, 836.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 141812/154911 [02:44<00:16, 808.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 141894/154911 [02:44<00:16, 793.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 141975/154911 [02:44<00:16, 794.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142055/154911 [02:45<00:16, 794.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142135/154911 [02:45<00:16, 766.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142212/154911 [02:45<00:16, 759.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142290/154911 [02:45<00:16, 754.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142367/154911 [02:45<00:16, 755.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142445/154911 [02:45<00:16, 760.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142522/154911 [02:45<00:16, 749.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142598/154911 [02:45<00:16, 743.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142674/154911 [02:45<00:16, 739.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▎| 142749/154911 [02:46<00:16, 737.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▍| 142824/154911 [02:46<00:17, 710.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▍| 142907/154911 [02:46<00:16, 738.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▍| 142982/154911 [02:46<00:16, 738.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▍| 143065/154911 [02:46<00:15, 760.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▍| 143181/154911 [02:46<00:13, 876.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  92%|███████▍| 143272/154911 [02:46<00:13, 881.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143362/154911 [02:46<00:14, 824.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143447/154911 [02:46<00:15, 760.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143525/154911 [02:47<00:15, 717.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143599/154911 [02:47<00:15, 707.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143672/154911 [02:47<00:15, 710.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143745/154911 [02:47<00:16, 682.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143814/154911 [02:47<00:16, 677.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143887/154911 [02:47<00:16, 687.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 143958/154911 [02:47<00:16, 681.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144049/154911 [02:47<00:14, 736.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144136/154911 [02:47<00:13, 773.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144214/154911 [02:47<00:13, 772.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144293/154911 [02:48<00:15, 675.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144363/154911 [02:48<00:16, 637.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144430/154911 [02:48<00:16, 621.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144495/154911 [02:48<00:17, 609.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144558/154911 [02:48<00:17, 577.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144618/154911 [02:48<00:17, 577.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144677/154911 [02:48<00:18, 567.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144735/154911 [02:48<00:17, 566.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  93%|███████▍| 144792/154911 [02:49<00:17, 566.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 144849/154911 [02:49<00:17, 560.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 144906/154911 [02:49<00:18, 552.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 144963/154911 [02:49<00:18, 544.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 145023/154911 [02:49<00:18, 544.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 145078/154911 [02:49<00:18, 543.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 145140/154911 [02:49<00:17, 551.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▍| 145202/154911 [02:49<00:17, 570.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145261/154911 [02:49<00:17, 542.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145316/154911 [02:49<00:17, 540.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145372/154911 [02:50<00:17, 531.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145427/154911 [02:50<00:18, 505.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145479/154911 [02:50<00:19, 480.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145544/154911 [02:50<00:19, 478.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145602/154911 [02:50<00:18, 503.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145656/154911 [02:50<00:18, 508.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145711/154911 [02:50<00:18, 505.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145770/154911 [02:50<00:17, 520.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145823/154911 [02:51<00:17, 516.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145876/154911 [02:51<00:18, 497.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145930/154911 [02:51<00:17, 508.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 145982/154911 [02:51<00:17, 511.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146037/154911 [02:51<00:17, 512.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146092/154911 [02:51<00:17, 517.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146146/154911 [02:51<00:16, 519.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146202/154911 [02:51<00:16, 522.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146256/154911 [02:51<00:16, 515.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146317/154911 [02:51<00:16, 525.06 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  94%|███████▌| 146370/154911 [02:52<00:16, 510.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146422/154911 [02:52<00:16, 503.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146474/154911 [02:52<00:17, 476.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146530/154911 [02:52<00:17, 492.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146587/154911 [02:52<00:16, 512.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146639/154911 [02:52<00:16, 507.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146696/154911 [02:52<00:15, 522.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146749/154911 [02:52<00:15, 515.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146812/154911 [02:52<00:14, 546.95 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146867/154911 [02:53<00:15, 530.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146922/154911 [02:53<00:15, 511.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 146979/154911 [02:53<00:15, 520.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147034/154911 [02:53<00:14, 528.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147088/154911 [02:53<00:14, 524.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147141/154911 [02:53<00:14, 521.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147195/154911 [02:53<00:15, 510.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147248/154911 [02:53<00:15, 507.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147299/154911 [02:53<00:15, 500.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147353/154911 [02:53<00:14, 509.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147416/154911 [02:54<00:13, 542.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147496/154911 [02:54<00:12, 611.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147581/154911 [02:54<00:10, 669.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▌| 147648/154911 [02:54<00:10, 663.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▋| 147715/154911 [02:54<00:11, 634.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▋| 147779/154911 [02:54<00:12, 585.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▋| 147839/154911 [02:54<00:12, 574.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  95%|███████▋| 147898/154911 [02:54<00:12, 548.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 147954/154911 [02:54<00:12, 539.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148009/154911 [02:55<00:13, 524.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148063/154911 [02:55<00:13, 524.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148119/154911 [02:55<00:12, 533.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148173/154911 [02:55<00:12, 534.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148227/154911 [02:55<00:12, 518.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148279/154911 [02:55<00:12, 513.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148332/154911 [02:55<00:12, 510.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148384/154911 [02:55<00:12, 509.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148451/154911 [02:55<00:11, 549.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148515/154911 [02:56<00:11, 568.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148604/154911 [02:56<00:09, 644.46 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148685/154911 [02:56<00:09, 678.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148753/154911 [02:56<00:09, 617.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148816/154911 [02:56<00:10, 579.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148876/154911 [02:56<00:10, 552.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148932/154911 [02:56<00:10, 551.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 148989/154911 [02:56<00:11, 527.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149047/154911 [02:56<00:11, 525.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149101/154911 [02:57<00:11, 517.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149153/154911 [02:57<00:11, 515.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149207/154911 [02:57<00:11, 504.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149259/154911 [02:57<00:11, 499.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149315/154911 [02:57<00:10, 515.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149367/154911 [02:57<00:10, 504.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149418/154911 [02:57<00:11, 484.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  96%|███████▋| 149468/154911 [02:57<00:11, 467.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149515/154911 [02:57<00:12, 449.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149561/154911 [02:58<00:12, 433.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149606/154911 [02:58<00:13, 401.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149649/154911 [02:58<00:13, 400.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149694/154911 [02:58<00:12, 404.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149736/154911 [02:58<00:13, 386.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149781/154911 [02:58<00:13, 387.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149822/154911 [02:58<00:13, 386.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149862/154911 [02:58<00:13, 381.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149902/154911 [02:58<00:13, 378.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149940/154911 [02:59<00:13, 366.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 149978/154911 [02:59<00:13, 357.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 150014/154911 [02:59<00:13, 351.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▋| 150051/154911 [02:59<00:13, 350.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150089/154911 [02:59<00:13, 355.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150130/154911 [02:59<00:13, 353.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150169/154911 [02:59<00:13, 360.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150207/154911 [02:59<00:13, 343.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150251/154911 [02:59<00:12, 358.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150291/154911 [03:00<00:12, 360.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150334/154911 [03:00<00:12, 363.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150380/154911 [03:00<00:12, 377.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150418/154911 [03:00<00:12, 373.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150456/154911 [03:00<00:12, 353.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150497/154911 [03:00<00:12, 365.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150534/154911 [03:00<00:12, 360.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150581/154911 [03:00<00:11, 381.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150630/154911 [03:00<00:10, 408.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150676/154911 [03:01<00:10, 418.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150722/154911 [03:01<00:09, 420.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150765/154911 [03:01<00:10, 412.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150808/154911 [03:01<00:10, 408.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150849/154911 [03:01<00:10, 403.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150891/154911 [03:01<00:09, 405.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150932/154911 [03:01<00:10, 392.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 150973/154911 [03:01<00:09, 396.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  97%|███████▊| 151014/154911 [03:01<00:10, 382.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151054/154911 [03:02<00:10, 380.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151094/154911 [03:02<00:10, 380.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151133/154911 [03:02<00:09, 381.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151172/154911 [03:02<00:09, 381.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151211/154911 [03:02<00:10, 369.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151252/154911 [03:02<00:09, 379.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151294/154911 [03:02<00:09, 387.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151333/154911 [03:02<00:09, 381.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151373/154911 [03:02<00:09, 372.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151411/154911 [03:02<00:09, 370.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151452/154911 [03:03<00:09, 358.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151496/154911 [03:03<00:08, 380.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151536/154911 [03:03<00:09, 367.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151575/154911 [03:03<00:09, 359.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151612/154911 [03:03<00:09, 360.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151651/154911 [03:03<00:08, 364.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151691/154911 [03:03<00:08, 373.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151729/154911 [03:03<00:08, 363.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151767/154911 [03:03<00:08, 368.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151804/154911 [03:04<00:08, 366.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151843/154911 [03:04<00:08, 372.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151881/154911 [03:04<00:08, 370.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151919/154911 [03:04<00:08, 361.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151959/154911 [03:04<00:07, 370.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 151998/154911 [03:04<00:07, 372.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152037/154911 [03:04<00:07, 361.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152074/154911 [03:04<00:07, 362.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152112/154911 [03:04<00:07, 355.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152150/154911 [03:05<00:07, 358.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152190/154911 [03:05<00:07, 370.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152229/154911 [03:05<00:07, 364.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152267/154911 [03:05<00:07, 352.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152308/154911 [03:05<00:07, 360.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152371/154911 [03:05<00:05, 435.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▊| 152477/154911 [03:05<00:03, 609.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  98%|███████▉| 152540/154911 [03:05<00:03, 606.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152603/154911 [03:05<00:04, 552.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152661/154911 [03:06<00:04, 453.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152710/154911 [03:06<00:05, 420.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152756/154911 [03:06<00:05, 365.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152797/154911 [03:06<00:05, 353.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152835/154911 [03:06<00:05, 346.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152872/154911 [03:06<00:06, 329.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152906/154911 [03:06<00:06, 313.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152942/154911 [03:07<00:06, 308.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 152974/154911 [03:07<00:06, 311.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153007/154911 [03:07<00:06, 292.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153044/154911 [03:07<00:06, 303.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153077/154911 [03:07<00:05, 308.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153109/154911 [03:07<00:05, 300.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153143/154911 [03:07<00:05, 309.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153175/154911 [03:07<00:05, 304.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153207/154911 [03:07<00:05, 298.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153238/154911 [03:08<00:05, 288.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153267/154911 [03:08<00:05, 286.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153297/154911 [03:08<00:05, 269.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153326/154911 [03:08<00:05, 272.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153354/154911 [03:08<00:05, 270.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153383/154911 [03:08<00:05, 261.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153413/154911 [03:08<00:05, 268.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153441/154911 [03:08<00:05, 270.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153469/154911 [03:08<00:05, 267.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153501/154911 [03:09<00:05, 278.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153529/154911 [03:09<00:05, 266.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153557/154911 [03:09<00:05, 265.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153594/154911 [03:09<00:04, 293.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153624/154911 [03:09<00:04, 277.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153654/154911 [03:09<00:04, 256.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153686/154911 [03:09<00:04, 267.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153714/154911 [03:09<00:04, 261.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153742/154911 [03:09<00:04, 260.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153774/154911 [03:10<00:04, 275.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153803/154911 [03:10<00:04, 271.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153831/154911 [03:10<00:04, 265.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153858/154911 [03:10<00:04, 253.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153884/154911 [03:10<00:04, 249.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153911/154911 [03:10<00:03, 250.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153937/154911 [03:10<00:04, 243.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153963/154911 [03:10<00:03, 243.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 153989/154911 [03:10<00:03, 236.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 154017/154911 [03:11<00:03, 236.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 154047/154911 [03:11<00:03, 236.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 154074/154911 [03:11<00:03, 240.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 154100/154911 [03:11<00:03, 238.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64):  99%|███████▉| 154124/154911 [03:11<00:03, 235.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154152/154911 [03:11<00:03, 247.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154177/154911 [03:11<00:03, 241.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154203/154911 [03:11<00:02, 243.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154228/154911 [03:11<00:02, 236.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154252/154911 [03:12<00:02, 229.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154277/154911 [03:12<00:02, 225.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154305/154911 [03:12<00:02, 223.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154336/154911 [03:12<00:02, 228.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154364/154911 [03:12<00:02, 236.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154390/154911 [03:12<00:02, 236.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154417/154911 [03:12<00:02, 238.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154441/154911 [03:12<00:02, 225.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154464/154911 [03:12<00:02, 208.42 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154486/154911 [03:13<00:02, 205.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154507/154911 [03:13<00:02, 190.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154527/154911 [03:13<00:01, 192.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154547/154911 [03:13<00:01, 185.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154567/154911 [03:13<00:01, 188.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154611/154911 [03:13<00:01, 257.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154638/154911 [03:13<00:01, 202.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154662/154911 [03:14<00:01, 186.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154684/154911 [03:14<00:01, 174.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154703/154911 [03:14<00:01, 165.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154721/154911 [03:14<00:01, 155.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154739/154911 [03:14<00:01, 144.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154754/154911 [03:14<00:01, 122.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154767/154911 [03:14<00:01, 108.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154780/154911 [03:15<00:01, 104.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|███████▉| 154792/154911 [03:15<00:01, 100.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154804/154911 [03:15<00:01, 97.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154815/154911 [03:15<00:01, 92.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154827/154911 [03:15<00:00, 92.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154839/154911 [03:15<00:00, 91.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154851/154911 [03:15<00:00, 91.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154862/154911 [03:15<00:00, 88.76 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154873/154911 [03:16<00:00, 86.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154882/154911 [03:16<00:00, 77.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154890/154911 [03:16<00:00, 72.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154898/154911 [03:16<00:00, 69.28 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Map (num_proc=64): 100%|████████▉| 154906/154911 [03:16<00:00, 51.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                                                                                \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   0%|                   | 0/154911 [00:00<?, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   1%|       | 1000/154911 [00:02<05:34, 459.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   1%|       | 2000/154911 [00:02<03:19, 768.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   2%|▏      | 3000/154911 [00:03<03:00, 843.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   3%|▏      | 4000/154911 [00:05<03:01, 831.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   3%|▏     | 5000/154911 [00:05<02:01, 1230.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   4%|▏     | 6000/154911 [00:05<01:35, 1564.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   5%|▎     | 7000/154911 [00:05<01:20, 1838.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   5%|▎     | 8000/154911 [00:06<01:16, 1909.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   6%|▎    | 10000/154911 [00:06<00:53, 2694.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   7%|▎    | 11000/154911 [00:07<00:51, 2815.55 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   7%|▎    | 11420/154911 [00:07<00:59, 2403.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   8%|▍    | 12420/154911 [00:08<01:06, 2150.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   9%|▍    | 13420/154911 [00:08<00:55, 2565.24 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):   9%|▍    | 14420/154911 [00:08<00:45, 3072.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  10%|▍    | 15420/154911 [00:08<00:46, 3015.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  11%|▌    | 16420/154911 [00:08<00:40, 3433.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  11%|▌    | 17420/154911 [00:09<00:38, 3580.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  12%|▌    | 18420/154911 [00:09<00:31, 4288.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  13%|▋    | 19420/154911 [00:09<00:26, 5061.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  13%|▋    | 20841/154911 [00:09<00:26, 5127.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  14%|▋    | 21841/154911 [00:10<00:40, 3288.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  15%|▋    | 22841/154911 [00:10<00:37, 3486.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  16%|▊    | 24261/154911 [00:10<00:36, 3599.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  17%|▊    | 26261/154911 [00:11<00:28, 4591.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  18%|▉    | 27261/154911 [00:11<00:28, 4413.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  19%|▉    | 28682/154911 [00:12<00:36, 3468.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  20%|▉    | 30682/154911 [00:12<00:28, 4338.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  20%|█    | 31682/154911 [00:12<00:28, 4346.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  21%|█    | 33103/154911 [00:12<00:27, 4367.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  22%|█    | 34103/154911 [00:13<00:29, 4162.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  23%|█▏   | 35524/154911 [00:13<00:24, 4793.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  24%|█▏   | 36524/154911 [00:13<00:33, 3579.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  24%|█▏   | 37524/154911 [00:14<00:30, 3848.29 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  25%|█▏   | 38524/154911 [00:14<00:30, 3828.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  26%|█▎   | 39524/154911 [00:14<00:25, 4601.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  26%|█▎   | 40524/154911 [00:14<00:26, 4377.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  27%|█▎   | 41945/154911 [00:15<00:29, 3857.71 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  28%|█▍   | 43945/154911 [00:15<00:21, 5124.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  30%|█▍   | 45945/154911 [00:15<00:16, 6441.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  31%|█▌   | 47365/154911 [00:15<00:15, 6913.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  31%|█▌   | 48786/154911 [00:16<00:27, 3802.25 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  32%|█▌   | 50207/154911 [00:16<00:24, 4237.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  33%|█▋   | 51628/154911 [00:17<00:24, 4197.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  34%|█▋   | 53049/154911 [00:17<00:28, 3582.19 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  36%|█▊   | 56470/154911 [00:17<00:18, 5290.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  37%|█▊   | 57470/154911 [00:18<00:19, 4986.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  38%|█▉   | 59470/154911 [00:18<00:18, 5051.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  40%|█▉   | 61470/154911 [00:18<00:15, 6137.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  41%|██   | 62891/154911 [00:19<00:15, 5899.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  41%|██   | 63891/154911 [00:19<00:16, 5461.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  42%|██   | 64891/154911 [00:19<00:15, 5919.92 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  43%|██▏  | 65891/154911 [00:19<00:14, 6337.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  44%|██▏  | 68312/154911 [00:20<00:23, 3676.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  45%|██▎  | 70312/154911 [00:20<00:16, 5093.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  46%|██▎  | 71733/154911 [00:21<00:18, 4426.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  48%|██▍  | 73733/154911 [00:21<00:15, 5226.79 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  49%|██▍  | 76154/154911 [00:22<00:20, 3878.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  50%|██▍  | 77154/154911 [00:22<00:18, 4303.67 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  51%|██▌  | 79154/154911 [00:22<00:15, 4924.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  52%|██▌  | 80154/154911 [00:22<00:16, 4445.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  53%|██▋  | 82154/154911 [00:23<00:12, 5887.48 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  54%|██▋  | 83154/154911 [00:23<00:12, 5901.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  57%|██▊  | 87575/154911 [00:23<00:07, 8819.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  58%|██▉  | 89417/154911 [00:24<00:13, 4843.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  59%|██▉  | 90838/154911 [00:24<00:13, 4792.61 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  59%|██▉  | 91838/154911 [00:24<00:12, 4998.91 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  60%|███  | 93259/154911 [00:25<00:12, 5025.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  61%|███  | 94259/154911 [00:25<00:10, 5567.34 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  61%|███  | 95259/154911 [00:25<00:10, 5698.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  62%|███  | 96259/154911 [00:25<00:13, 4473.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  63%|███▏ | 97259/154911 [00:26<00:15, 3832.45 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  63%|███▏ | 98259/154911 [00:26<00:14, 3803.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  64%|███▏ | 99101/154911 [00:26<00:14, 3929.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  65%|██▌ | 100101/154911 [00:26<00:13, 4024.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  65%|██▌ | 101101/154911 [00:27<00:11, 4763.93 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  66%|██▋ | 102522/154911 [00:27<00:10, 4970.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  67%|██▋ | 103943/154911 [00:27<00:11, 4356.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  68%|██▋ | 104943/154911 [00:27<00:10, 4792.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  68%|██▋ | 105943/154911 [00:28<00:16, 2900.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  69%|██▊ | 106943/154911 [00:28<00:13, 3609.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  70%|██▊ | 107943/154911 [00:28<00:12, 3629.47 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  71%|██▊ | 109943/154911 [00:29<00:08, 5374.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  72%|██▊ | 110785/154911 [00:29<00:15, 2815.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  72%|██▉ | 112206/154911 [00:30<00:12, 3336.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  73%|██▉ | 113627/154911 [00:30<00:14, 2808.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  75%|██▉ | 115627/154911 [00:30<00:09, 4182.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  76%|███ | 117048/154911 [00:32<00:14, 2626.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  76%|███ | 118469/154911 [00:32<00:13, 2762.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  77%|███ | 119311/154911 [00:32<00:14, 2433.37 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  78%|███ | 120311/154911 [00:33<00:12, 2864.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  78%|███▏| 121311/154911 [00:33<00:11, 2871.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  79%|███▏| 122311/154911 [00:33<00:11, 2923.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  80%|███▏| 123731/154911 [00:34<00:09, 3353.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  80%|███▏| 124571/154911 [00:34<00:10, 2812.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  81%|███▏| 125571/154911 [00:35<00:11, 2547.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  81%|███▎| 125991/154911 [00:35<00:11, 2559.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  82%|███▎| 126411/154911 [00:35<00:12, 2300.16 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  82%|███▎| 127411/154911 [00:35<00:09, 2837.04 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  83%|███▎| 128831/154911 [00:36<00:08, 2908.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  84%|███▎| 130251/154911 [00:36<00:07, 3296.82 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  85%|███▍| 132251/154911 [00:36<00:04, 4780.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  87%|███▍| 134251/154911 [00:36<00:03, 6073.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  87%|███▍| 135091/154911 [00:37<00:04, 4492.84 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  88%|███▌| 135931/154911 [00:38<00:10, 1811.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  89%|███▌| 137931/154911 [00:39<00:07, 2420.72 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  90%|███▌| 138931/154911 [00:39<00:05, 2671.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  90%|███▌| 139771/154911 [00:39<00:06, 2339.05 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  91%|███▋| 140771/154911 [00:40<00:05, 2768.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  92%|███▋| 141771/154911 [00:40<00:03, 3287.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  92%|███▋| 142611/154911 [00:41<00:06, 1888.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  93%|███▋| 143611/154911 [00:41<00:04, 2359.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  94%|███▋| 145031/154911 [00:41<00:03, 3051.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  95%|███▊| 146451/154911 [00:41<00:02, 3628.27 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  95%|███▊| 147291/154911 [00:42<00:02, 3346.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  96%|███▊| 148291/154911 [00:42<00:02, 3125.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  96%|███▊| 149291/154911 [00:43<00:01, 3112.35 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  97%|███▉| 150291/154911 [00:43<00:01, 2604.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  97%|███▉| 150711/154911 [00:44<00:03, 1288.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  98%|███▉| 151131/154911 [00:45<00:02, 1330.86 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  98%|███▉| 151551/154911 [00:45<00:02, 1451.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  98%|███▉| 151971/154911 [00:45<00:02, 1203.08 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  98%|███▉| 152391/154911 [00:45<00:01, 1391.30 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  99%|████▉| 152811/154911 [00:46<00:02, 934.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  99%|████▉| 153231/154911 [00:47<00:02, 688.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64):  99%|████▉| 153651/154911 [00:48<00:01, 773.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64): 100%|████▉| 154491/154911 [00:49<00:00, 678.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Filter (num_proc=64): 100%|█████| 154911/154911 [00:50<00:00, 771.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                                                                                \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (0/8 shards):   0%|         | 0/98578 [00:00<?, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (0/8 shards):   3%| | 3000/98578 [00:00<00:05, 18527.31 examp"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (0/8 shards):   7%| | 7000/98578 [00:00<00:04, 20097.03 examp"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (0/8 shards):  11%| | 11000/98578 [00:00<00:04, 21169.98 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (1/8 shards):  13%|▏| 12323/98578 [00:00<00:04, 21169.98 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (1/8 shards):  15%|▏| 14323/98578 [00:00<00:03, 21233.33 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (1/8 shards):  19%|▏| 18323/98578 [00:00<00:03, 22135.19 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (1/8 shards):  23%|▏| 22323/98578 [00:01<00:03, 22970.66 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (2/8 shards):  25%|▎| 24646/98578 [00:01<00:03, 22970.66 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (2/8 shards):  27%|▎| 26646/98578 [00:01<00:03, 23065.81 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (2/8 shards):  31%|▎| 30646/98578 [00:01<00:02, 23753.83 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (2/8 shards):  35%|▎| 34646/98578 [00:01<00:02, 24541.91 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (3/8 shards):  38%|▍| 36968/98578 [00:01<00:02, 24541.91 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (3/8 shards):  40%|▍| 38968/98578 [00:01<00:02, 24818.09 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (3/8 shards):  44%|▍| 42968/98578 [00:01<00:02, 25747.69 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (3/8 shards):  48%|▍| 46968/98578 [00:01<00:01, 26252.00 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (4/8 shards):  50%|▌| 49290/98578 [00:02<00:01, 26252.00 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (4/8 shards):  52%|▌| 51290/98578 [00:02<00:01, 25473.05 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (4/8 shards):  56%|▌| 55290/98578 [00:02<00:01, 26262.70 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (4/8 shards):  60%|▌| 59290/98578 [00:02<00:01, 26827.24 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (5/8 shards):  63%|▋| 61612/98578 [00:02<00:01, 26827.24 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (5/8 shards):  65%|▋| 63612/98578 [00:02<00:01, 26835.26 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (5/8 shards):  69%|▋| 67612/98578 [00:02<00:01, 26690.16 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (5/8 shards):  73%|▋| 71612/98578 [00:02<00:00, 27219.18 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (6/8 shards):  75%|▊| 73934/98578 [00:02<00:00, 27219.18 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (6/8 shards):  77%|▊| 75934/98578 [00:03<00:00, 27394.39 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (6/8 shards):  80%|▊| 78934/98578 [00:03<00:00, 25879.88 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (6/8 shards):  84%|▊| 82934/98578 [00:03<00:00, 26942.57 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (6/8 shards):  88%|▉| 86256/98578 [00:03<00:00, 27554.84 exam\r",
+      "Saving the dataset (7/8 shards):  88%|▉| 86256/98578 [00:03<00:00, 27554.84 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (7/8 shards):  92%|▉| 90256/98578 [00:03<00:00, 27517.24 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (7/8 shards):  96%|▉| 94256/98578 [00:03<00:00, 28137.60 exam"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (7/8 shards): 100%|▉| 98256/98578 [00:03<00:00, 28717.59 exam\r",
+      "Saving the dataset (8/8 shards): 100%|█| 98578/98578 [00:03<00:00, 28717.59 exam\r",
+      "                                                                                \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Saving the dataset (0/1 shards):   0%|            | 0/99 [00:00<?, ? examples/s]\r",
+      "Saving the dataset (1/1 shards): 100%|█| 99/99 [00:00<00:00, 7058.12 examples/s]\r",
+      "                                                                                \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[rank: 0] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\r\n",
+      "[2023-09-02 06:23:32,173] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enabling DeepSpeed BF16.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "#\r\n",
+      "# RWKV lighting_trainer.py important notes \r\n",
+      "# https://github.com/RWKV/RWKV-infctx-trainer \r\n",
+      "#\r\n",
+      "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\r\n",
+      "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n",
+      "# - When resuming from checkpoint, the estimated time is inaccurate\r\n",
+      "#\r\n",
+      "\r\n",
+      "[RWKV.model] Configuring optimizer with\r\n",
+      "    - lr_init:  3.000e-04 (0.0003)\r\n",
+      "    - lr_final: 1.000e-04 (0.0001)\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Detected CUDA files, patching ldflags\r\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/fused_adam/build.ninja...\r\n",
+      "Building extension module fused_adam...\r\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ninja: no work to do.\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.06581354141235352 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10160326957702637 seconds\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10122323036193848 seconds\r\n",
+      "Time to load fused_adam op: 0.10131621360778809 seconds\r\n",
+      "Time to load fused_adam op: 0.10161590576171875 seconds\r\n",
+      "Time to load fused_adam op: 0.10172796249389648 seconds\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10164880752563477 seconds\r\n",
+      "Loading `train_dataloader` to estimate number of stepping batches.\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10137581825256348 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/utils/build.ninja...\r\n",
+      "Building extension module utils...\r\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ninja: no work to do.\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0746297836303711 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.10225343704223633 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading extension module utils...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.1022336483001709 seconds\r\n",
+      "Time to load utils op: 0.10247349739074707 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.1032721996307373 seconds\r\n",
+      "Time to load utils op: 0.10237407684326172 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.10254526138305664 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.10282373428344727 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 5 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 1 partition count [8, 8] and sizes[(176584448, False), (384, False)] "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 2 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 4 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 7 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 0 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 3 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Rank: 6 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006437301635742188 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0009703636169433594 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006546974182128906 seconds\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006368160247802734 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006356239318847656 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006177425384521484 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0008919239044189453 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0009381771087646484 seconds\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "  | Name   | Type       | Params\r\n",
+      "--------------------------------------\r\n",
+      "0 | emb    | Embedding  | 51.5 M\r\n",
+      "1 | blocks | ModuleList | 1.3 B \r\n",
+      "2 | ln_out | LayerNorm  | 2.0 K \r\n",
+      "3 | head   | Linear     | 51.5 M\r\n",
+      "--------------------------------------\r\n",
+      "1.4 B     Trainable params\r\n",
+      "0         Non-trainable params\r\n",
+      "1.4 B     Total params\r\n",
+      "5,650.715 Total estimated model params size (MB)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Training: 0it [00:00, ?it/s]\r",
+      "Training:   0%|                                       | 0/12323 [00:00<?, ?it/s]\r",
+      "Epoch 0:   0%|                                        | 0/12323 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%|                             | 1/12323 [00:15<54:17:40, 15.86s/it]\r",
+      "Epoch 0:   0%| | 1/12323 [00:15<54:17:57, 15.86s/it, v_num=i2o7, train/loss=0.50"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 2/12323 [00:19<34:07:52,  9.97s/it, v_num=i2o7, train/loss=0.50\r",
+      "Epoch 0:   0%| | 2/12323 [00:19<34:07:55,  9.97s/it, v_num=i2o7, train/loss=0.12"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 3/12323 [00:23<26:51:33,  7.85s/it, v_num=i2o7, train/loss=0.12\r",
+      "Epoch 0:   0%| | 3/12323 [00:23<26:51:35,  7.85s/it, v_num=i2o7, train/loss=0.10"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 4/12323 [00:32<28:02:51,  8.20s/it, v_num=i2o7, train/loss=0.10\r",
+      "Epoch 0:   0%| | 4/12323 [00:32<28:02:54,  8.20s/it, v_num=i2o7, train/loss=8.25"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 5/12323 [00:37<25:55:19,  7.58s/it, v_num=i2o7, train/loss=8.25\r",
+      "Epoch 0:   0%| | 5/12323 [00:37<25:55:21,  7.58s/it, v_num=i2o7, train/loss=0.53"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 6/12323 [00:41<23:46:16,  6.95s/it, v_num=i2o7, train/loss=0.53\r",
+      "Epoch 0:   0%| | 6/12323 [00:41<23:46:18,  6.95s/it, v_num=i2o7, train/loss=0.24"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 7/12323 [00:43<21:29:50,  6.28s/it, v_num=i2o7, train/loss=0.24\r",
+      "Epoch 0:   0%| | 7/12323 [00:43<21:29:51,  6.28s/it, v_num=i2o7, train/loss=0.09"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 8/12323 [00:50<21:33:12,  6.30s/it, v_num=i2o7, train/loss=0.09\r",
+      "Epoch 0:   0%| | 8/12323 [00:50<21:33:13,  6.30s/it, v_num=i2o7, train/loss=5.22"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 9/12323 [00:57<21:57:52,  6.42s/it, v_num=i2o7, train/loss=5.22\r",
+      "Epoch 0:   0%| | 9/12323 [00:57<21:57:53,  6.42s/it, v_num=i2o7, train/loss=6.00"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 10/12323 [01:05<22:17:54,  6.52s/it, v_num=i2o7, train/loss=6.0\r",
+      "Epoch 0:   0%| | 10/12323 [01:05<22:17:56,  6.52s/it, v_num=i2o7, train/loss=6.7"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 11/12323 [01:09<21:45:45,  6.36s/it, v_num=i2o7, train/loss=6.7\r",
+      "Epoch 0:   0%| | 11/12323 [01:09<21:45:46,  6.36s/it, v_num=i2o7, train/loss=0.6"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 12/12323 [01:11<20:18:23,  5.94s/it, v_num=i2o7, train/loss=0.6\r",
+      "Epoch 0:   0%| | 12/12323 [01:11<20:18:24,  5.94s/it, v_num=i2o7, train/loss=0.0"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 13/12323 [01:17<20:24:40,  5.97s/it, v_num=i2o7, train/loss=0.0\r",
+      "Epoch 0:   0%| | 13/12323 [01:17<20:24:41,  5.97s/it, v_num=i2o7, train/loss=5.6"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 14/12323 [01:25<20:59:28,  6.14s/it, v_num=i2o7, train/loss=5.6\r",
+      "Epoch 0:   0%| | 14/12323 [01:25<20:59:29,  6.14s/it, v_num=i2o7, train/loss=8.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 15/12323 [01:31<20:49:13,  6.09s/it, v_num=i2o7, train/loss=8.1\r",
+      "Epoch 0:   0%| | 15/12323 [01:31<20:49:14,  6.09s/it, v_num=i2o7, train/loss=1.2"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 16/12323 [01:36<20:40:32,  6.05s/it, v_num=i2o7, train/loss=1.2\r",
+      "Epoch 0:   0%| | 16/12323 [01:36<20:40:33,  6.05s/it, v_num=i2o7, train/loss=1.7"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 17/12323 [01:38<19:51:29,  5.81s/it, v_num=i2o7, train/loss=1.7\r",
+      "Epoch 0:   0%| | 17/12323 [01:38<19:51:29,  5.81s/it, v_num=i2o7, train/loss=0.0"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 18/12323 [01:41<19:16:26,  5.64s/it, v_num=i2o7, train/loss=0.0\r",
+      "Epoch 0:   0%| | 18/12323 [01:41<19:16:26,  5.64s/it, v_num=i2o7, train/loss=0.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 19/12323 [01:44<18:50:18,  5.51s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 19/12323 [01:44<18:50:18,  5.51s/it, v_num=i2o7, train/loss=0.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 20/12323 [01:53<19:19:22,  5.65s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 20/12323 [01:53<19:19:22,  5.65s/it, v_num=i2o7, train/loss=8.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 21/12323 [01:55<18:48:28,  5.50s/it, v_num=i2o7, train/loss=8.1\r",
+      "Epoch 0:   0%| | 21/12323 [01:55<18:48:29,  5.50s/it, v_num=i2o7, train/loss=0.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 22/12323 [01:58<18:27:39,  5.40s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 22/12323 [01:58<18:27:39,  5.40s/it, v_num=i2o7, train/loss=0.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 23/12323 [02:03<18:17:47,  5.36s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 23/12323 [02:03<18:17:47,  5.36s/it, v_num=i2o7, train/loss=0.3"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 24/12323 [02:07<18:08:10,  5.31s/it, v_num=i2o7, train/loss=0.3\r",
+      "Epoch 0:   0%| | 24/12323 [02:07<18:08:10,  5.31s/it, v_num=i2o7, train/loss=0.2"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 25/12323 [02:11<17:59:05,  5.26s/it, v_num=i2o7, train/loss=0.2\r",
+      "Epoch 0:   0%| | 25/12323 [02:11<17:59:05,  5.26s/it, v_num=i2o7, train/loss=0.3"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 26/12323 [02:14<17:42:35,  5.18s/it, v_num=i2o7, train/loss=0.3\r",
+      "Epoch 0:   0%| | 26/12323 [02:14<17:42:36,  5.18s/it, v_num=i2o7, train/loss=0.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 27/12323 [02:21<17:57:34,  5.26s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 27/12323 [02:21<17:57:35,  5.26s/it, v_num=i2o7, train/loss=7.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 28/12323 [02:24<17:35:13,  5.15s/it, v_num=i2o7, train/loss=7.1\r",
+      "Epoch 0:   0%| | 28/12323 [02:24<17:35:13,  5.15s/it, v_num=i2o7, train/loss=0.0"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 29/12323 [02:28<17:31:47,  5.13s/it, v_num=i2o7, train/loss=0.0\r",
+      "Epoch 0:   0%| | 29/12323 [02:28<17:31:47,  5.13s/it, v_num=i2o7, train/loss=0.9"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 30/12323 [02:35<17:39:22,  5.17s/it, v_num=i2o7, train/loss=0.9\r",
+      "Epoch 0:   0%| | 30/12323 [02:35<17:39:22,  5.17s/it, v_num=i2o7, train/loss=5.6"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 31/12323 [02:40<17:40:10,  5.17s/it, v_num=i2o7, train/loss=5.6\r",
+      "Epoch 0:   0%| | 31/12323 [02:40<17:40:10,  5.17s/it, v_num=i2o7, train/loss=1.8"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 32/12323 [03:21<21:27:07,  6.28s/it, v_num=i2o7, train/loss=1.8"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 32/12323 [03:23<21:45:52,  6.37s/it, v_num=i2o7, train/loss=4.7"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 33/12323 [03:27<21:30:07,  6.30s/it, v_num=i2o7, train/loss=4.7\r",
+      "Epoch 0:   0%| | 33/12323 [03:27<21:30:07,  6.30s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 34/12323 [03:29<20:59:29,  6.15s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 34/12323 [03:29<20:59:29,  6.15s/it, v_num=i2o7, train/loss=8.8"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 35/12323 [03:30<20:30:35,  6.01s/it, v_num=i2o7, train/loss=8.8\r",
+      "Epoch 0:   0%| | 35/12323 [03:30<20:30:35,  6.01s/it, v_num=i2o7, train/loss=6.6"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 36/12323 [03:38<20:42:57,  6.07s/it, v_num=i2o7, train/loss=6.6\r",
+      "Epoch 0:   0%| | 36/12323 [03:38<20:42:57,  6.07s/it, v_num=i2o7, train/loss=10."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 37/12323 [03:44<20:43:49,  6.07s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 37/12323 [03:44<20:43:49,  6.07s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 38/12323 [03:51<20:49:55,  6.10s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 38/12323 [03:51<20:49:55,  6.10s/it, v_num=i2o7, train/loss=10."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 39/12323 [03:54<20:29:47,  6.01s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 39/12323 [03:54<20:29:48,  6.01s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 40/12323 [03:58<20:20:40,  5.96s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 40/12323 [03:58<20:20:40,  5.96s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 41/12323 [04:03<20:16:55,  5.94s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 41/12323 [04:03<20:16:55,  5.94s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 42/12323 [04:07<20:06:08,  5.89s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 42/12323 [04:07<20:06:08,  5.89s/it, v_num=i2o7, train/loss=12."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 43/12323 [04:12<20:00:16,  5.86s/it, v_num=i2o7, train/loss=12.\r",
+      "Epoch 0:   0%| | 43/12323 [04:12<20:00:16,  5.86s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 44/12323 [04:14<19:43:10,  5.78s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 44/12323 [04:14<19:43:10,  5.78s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 45/12323 [04:21<19:49:25,  5.81s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 45/12323 [04:21<19:49:25,  5.81s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 46/12323 [04:27<19:51:16,  5.82s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 46/12323 [04:27<19:51:16,  5.82s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 47/12323 [04:32<19:48:18,  5.81s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 47/12323 [04:32<19:48:18,  5.81s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 48/12323 [04:41<19:58:01,  5.86s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 48/12323 [04:41<19:58:01,  5.86s/it, v_num=i2o7, train/loss=10."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 49/12323 [04:45<19:51:03,  5.82s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 49/12323 [04:45<19:51:03,  5.82s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 50/12323 [04:48<19:38:26,  5.76s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 50/12323 [04:48<19:38:26,  5.76s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 51/12323 [04:49<19:22:20,  5.68s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 51/12323 [04:49<19:22:20,  5.68s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 52/12323 [04:54<19:16:46,  5.66s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 52/12323 [04:54<19:16:46,  5.66s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 53/12323 [04:59<19:15:27,  5.65s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 53/12323 [04:59<19:15:27,  5.65s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 54/12323 [05:02<19:06:28,  5.61s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 54/12323 [05:02<19:06:28,  5.61s/it, v_num=i2o7, train/loss=12."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 55/12323 [05:04<18:50:12,  5.53s/it, v_num=i2o7, train/loss=12.\r",
+      "Epoch 0:   0%| | 55/12323 [05:04<18:50:12,  5.53s/it, v_num=i2o7, train/loss=10."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 56/12323 [05:11<18:56:25,  5.56s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 56/12323 [05:11<18:56:25,  5.56s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 57/12323 [05:14<18:48:05,  5.52s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 57/12323 [05:14<18:48:05,  5.52s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 58/12323 [05:19<18:47:47,  5.52s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 58/12323 [05:19<18:47:47,  5.52s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 59/12323 [05:26<18:50:49,  5.53s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 59/12323 [05:26<18:50:49,  5.53s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 60/12323 [05:28<18:39:44,  5.48s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 60/12323 [05:28<18:39:44,  5.48s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   0%| | 61/12323 [05:34<18:39:19,  5.48s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 61/12323 [05:34<18:39:19,  5.48s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 62/12323 [05:35<18:27:05,  5.42s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   1%| | 62/12323 [05:35<18:27:05,  5.42s/it, v_num=i2o7, train/loss=10."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 63/12323 [05:40<18:25:10,  5.41s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 63/12323 [05:40<18:25:10,  5.41s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 64/12323 [06:37<21:09:47,  6.21s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 64/12323 [06:40<21:19:10,  6.26s/it, v_num=i2o7, train/loss=11."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 65/12323 [06:47<21:19:32,  6.26s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   1%| | 65/12323 [06:47<21:19:32,  6.26s/it, v_num=i2o7, train/loss=9.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 66/12323 [06:50<21:11:53,  6.23s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 66/12323 [06:50<21:11:53,  6.23s/it, v_num=i2o7, train/loss=9.6"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 67/12323 [06:59<21:18:30,  6.26s/it, v_num=i2o7, train/loss=9.6\r",
+      "Epoch 0:   1%| | 67/12323 [06:59<21:18:30,  6.26s/it, v_num=i2o7, train/loss=9.0"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 68/12323 [07:01<21:06:36,  6.20s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 68/12323 [07:01<21:06:36,  6.20s/it, v_num=i2o7, train/loss=10."
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 69/12323 [07:05<20:57:59,  6.16s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 69/12323 [07:05<20:58:00,  6.16s/it, v_num=i2o7, train/loss=9.8"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 70/12323 [07:08<20:51:12,  6.13s/it, v_num=i2o7, train/loss=9.8\r",
+      "Epoch 0:   1%| | 70/12323 [07:08<20:51:12,  6.13s/it, v_num=i2o7, train/loss=9.5"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 71/12323 [07:16<20:55:08,  6.15s/it, v_num=i2o7, train/loss=9.5\r",
+      "Epoch 0:   1%| | 71/12323 [07:16<20:55:09,  6.15s/it, v_num=i2o7, train/loss=9.0"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 72/12323 [07:21<20:51:28,  6.13s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 72/12323 [07:21<20:51:28,  6.13s/it, v_num=i2o7, train/loss=9.5"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 73/12323 [07:28<20:55:02,  6.15s/it, v_num=i2o7, train/loss=9.5\r",
+      "Epoch 0:   1%| | 73/12323 [07:28<20:55:02,  6.15s/it, v_num=i2o7, train/loss=9.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 74/12323 [07:36<20:58:28,  6.16s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 74/12323 [07:36<20:58:28,  6.16s/it, v_num=i2o7, train/loss=9.1"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 75/12323 [07:38<20:46:34,  6.11s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 75/12323 [07:38<20:46:34,  6.11s/it, v_num=i2o7, train/loss=0.2"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 76/12323 [07:42<20:41:25,  6.08s/it, v_num=i2o7, train/loss=0.2\r",
+      "Epoch 0:   1%| | 76/12323 [07:42<20:41:25,  6.08s/it, v_num=i2o7, train/loss=9.4"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 77/12323 [07:47<20:38:58,  6.07s/it, v_num=i2o7, train/loss=9.4\r",
+      "Epoch 0:   1%| | 77/12323 [07:47<20:38:58,  6.07s/it, v_num=i2o7, train/loss=9.3"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 78/12323 [07:50<20:31:25,  6.03s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 78/12323 [07:50<20:31:25,  6.03s/it, v_num=i2o7, train/loss=9.9"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 79/12323 [07:55<20:27:49,  6.02s/it, v_num=i2o7, train/loss=9.9\r",
+      "Epoch 0:   1%| | 79/12323 [07:55<20:27:49,  6.02s/it, v_num=i2o7, train/loss=9.4"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Epoch 0:   1%| | 80/12323 [08:00<20:25:48,  6.01s/it, v_num=i2o7, train/loss=9.4\r",
+      "Epoch 0:   1%| | 80/12323 [08:00<20:25:48,  6.01s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 81/12323 [08:02<20:16:06,  5.96s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 81/12323 [08:02<20:16:06,  5.96s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 82/12323 [08:11<20:21:40,  5.99s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 82/12323 [08:11<20:21:40,  5.99s/it, v_num=i2o7, train/loss=9.0"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 83/12323 [08:15<20:17:08,  5.97s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 83/12323 [08:15<20:17:09,  5.97s/it, v_num=i2o7, train/loss=9.5"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 11 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 84/12323 [08:17<20:07:57,  5.92s/it, v_num=i2o7, train/loss=9.5\r",
+      "Epoch 0:   1%| | 84/12323 [08:17<20:07:58,  5.92s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 85/12323 [08:24<20:11:13,  5.94s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 85/12323 [08:24<20:11:13,  5.94s/it, v_num=i2o7, train/loss=9.0"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 86/12323 [08:29<20:08:20,  5.92s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 86/12323 [08:29<20:08:20,  5.92s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 87/12323 [08:34<20:06:37,  5.92s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 87/12323 [08:34<20:06:37,  5.92s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 88/12323 [08:38<20:01:37,  5.89s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 88/12323 [08:38<20:01:37,  5.89s/it, v_num=i2o7, train/loss=9.6"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 89/12323 [08:40<19:52:03,  5.85s/it, v_num=i2o7, train/loss=9.6\r",
+      "Epoch 0:   1%| | 89/12323 [08:40<19:52:03,  5.85s/it, v_num=i2o7, train/loss=9.9"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 90/12323 [08:47<19:55:15,  5.86s/it, v_num=i2o7, train/loss=9.9\r",
+      "Epoch 0:   1%| | 90/12323 [08:47<19:55:15,  5.86s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 91/12323 [08:52<19:53:54,  5.86s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 91/12323 [08:52<19:53:55,  5.86s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 92/12323 [08:57<19:51:24,  5.84s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 92/12323 [08:57<19:51:24,  5.84s/it, v_num=i2o7, train/loss=9.4"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 93/12323 [09:02<19:48:36,  5.83s/it, v_num=i2o7, train/loss=9.4\r",
+      "Epoch 0:   1%| | 93/12323 [09:02<19:48:36,  5.83s/it, v_num=i2o7, train/loss=9.8"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 94/12323 [09:07<19:46:05,  5.82s/it, v_num=i2o7, train/loss=9.8\r",
+      "Epoch 0:   1%| | 94/12323 [09:07<19:46:05,  5.82s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 95/12323 [09:12<19:44:46,  5.81s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 95/12323 [09:12<19:44:46,  5.81s/it, v_num=i2o7, train/loss=9.2"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 96/12323 [10:00<21:13:50,  6.25s/it, v_num=i2o7, train/loss=9.2\r",
+      "Epoch 0:   1%| | 96/12323 [10:00<21:13:50,  6.25s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 97/12323 [10:08<21:18:08,  6.27s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 97/12323 [10:08<21:18:08,  6.27s/it, v_num=i2o7, train/loss=7.7"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 98/12323 [10:16<21:22:27,  6.29s/it, v_num=i2o7, train/loss=7.7\r",
+      "Epoch 0:   1%| | 98/12323 [10:16<21:22:27,  6.29s/it, v_num=i2o7, train/loss=7.8"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 99/12323 [10:22<21:20:14,  6.28s/it, v_num=i2o7, train/loss=7.8\r",
+      "Epoch 0:   1%| | 99/12323 [10:22<21:20:14,  6.28s/it, v_num=i2o7, train/loss=7.8"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 100/12323 [10:28<21:20:18,  6.28s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 100/12323 [10:28<21:20:19,  6.28s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 101/12323 [10:36<21:24:32,  6.31s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 101/12323 [10:36<21:24:32,  6.31s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 102/12323 [10:38<21:15:23,  6.26s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 102/12323 [10:38<21:15:23,  6.26s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 103/12323 [10:42<21:10:30,  6.24s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 103/12323 [10:42<21:10:30,  6.24s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 104/12323 [10:51<21:14:50,  6.26s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 104/12323 [10:51<21:14:50,  6.26s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 105/12323 [10:53<21:08:02,  6.23s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 105/12323 [10:53<21:08:02,  6.23s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 106/12323 [10:55<20:59:24,  6.19s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 106/12323 [10:55<20:59:24,  6.19s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 107/12323 [11:03<21:03:19,  6.20s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 107/12323 [11:03<21:03:19,  6.20s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 108/12323 [11:05<20:53:52,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 108/12323 [11:05<20:53:52,  6.16s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 109/12323 [11:12<20:55:30,  6.17s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   1%| | 109/12323 [11:12<20:55:30,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 110/12323 [11:13<20:47:11,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 110/12323 [11:13<20:47:12,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 111/12323 [11:19<20:45:29,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 111/12323 [11:19<20:45:29,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 112/12323 [11:26<20:47:24,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 112/12323 [11:26<20:47:24,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 113/12323 [11:29<20:41:19,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 113/12323 [11:29<20:41:19,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 114/12323 [11:32<20:36:07,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 114/12323 [11:32<20:36:07,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 115/12323 [11:33<20:27:33,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 115/12323 [11:33<20:27:33,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 116/12323 [11:38<20:24:25,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 116/12323 [11:38<20:24:25,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 117/12323 [11:41<20:19:32,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 117/12323 [11:41<20:19:32,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 118/12323 [11:45<20:15:34,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 118/12323 [11:45<20:15:34,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 119/12323 [11:48<20:10:49,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 119/12323 [11:48<20:10:49,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 120/12323 [11:55<20:13:03,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 120/12323 [11:55<20:13:04,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 121/12323 [12:03<20:15:15,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 121/12323 [12:03<20:15:15,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 122/12323 [12:09<20:15:47,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 122/12323 [12:09<20:15:47,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 123/12323 [12:11<20:08:48,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 123/12323 [12:11<20:08:48,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 124/12323 [12:18<20:11:07,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 124/12323 [12:18<20:11:07,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 125/12323 [12:22<20:06:47,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 125/12323 [12:22<20:06:47,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 126/12323 [12:28<20:07:38,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 126/12323 [12:28<20:07:38,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 127/12323 [12:29<20:00:04,  5.90s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 127/12323 [12:29<20:00:04,  5.90s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 128/12323 [13:14<21:01:41,  6.21s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 128/12323 [13:14<21:01:41,  6.21s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 129/12323 [13:20<21:01:51,  6.21s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 129/12323 [13:20<21:01:51,  6.21s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 130/12323 [13:24<20:57:13,  6.19s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 130/12323 [13:24<20:57:13,  6.19s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 131/12323 [13:28<20:53:26,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 131/12323 [13:28<20:53:26,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 132/12323 [13:34<20:53:32,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 132/12323 [13:34<20:53:32,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 133/12323 [13:41<20:55:28,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 133/12323 [13:41<20:55:28,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 134/12323 [13:48<20:55:44,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 134/12323 [13:48<20:55:44,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 135/12323 [13:52<20:52:04,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 135/12323 [13:52<20:52:04,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 136/12323 [14:00<20:55:17,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 136/12323 [14:00<20:55:17,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 137/12323 [14:05<20:54:01,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 137/12323 [14:05<20:54:01,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 138/12323 [14:09<20:50:30,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 138/12323 [14:09<20:50:30,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 139/12323 [14:13<20:46:59,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 139/12323 [14:13<20:46:59,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 140/12323 [14:14<20:39:49,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 140/12323 [14:14<20:39:49,  6.11s/it, v_num=i2o7, train/loss=6."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 141/12323 [14:19<20:37:54,  6.10s/it, v_num=i2o7, train/loss=6.\r",
+      "Epoch 0:   1%| | 141/12323 [14:19<20:37:54,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 142/12323 [14:21<20:31:41,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 142/12323 [14:21<20:31:41,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 143/12323 [14:24<20:26:59,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 143/12323 [14:24<20:26:59,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 13 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 144/12323 [14:31<20:28:49,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 144/12323 [14:31<20:28:49,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 145/12323 [14:36<20:26:18,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 145/12323 [14:36<20:26:18,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 146/12323 [14:38<20:21:41,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 146/12323 [14:38<20:21:41,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 147/12323 [14:40<20:15:43,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 147/12323 [14:40<20:15:43,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 148/12323 [14:49<20:19:07,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 148/12323 [14:49<20:19:07,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 149/12323 [14:54<20:18:12,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 149/12323 [14:54<20:18:12,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 150/12323 [14:57<20:13:48,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 150/12323 [14:57<20:13:48,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 151/12323 [14:59<20:08:44,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 151/12323 [14:59<20:08:44,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 152/12323 [15:03<20:05:47,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 152/12323 [15:03<20:05:47,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 153/12323 [15:10<20:07:42,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 153/12323 [15:10<20:07:42,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 154/12323 [15:13<20:03:28,  5.93s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 154/12323 [15:13<20:03:28,  5.93s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 155/12323 [15:20<20:03:58,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 155/12323 [15:20<20:03:58,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 156/12323 [15:23<20:00:28,  5.92s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 156/12323 [15:23<20:00:28,  5.92s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 51 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 157/12323 [15:30<20:02:20,  5.93s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 157/12323 [15:30<20:02:20,  5.93s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 158/12323 [15:38<20:04:05,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 158/12323 [15:38<20:04:05,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 159/12323 [15:46<20:07:06,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 159/12323 [15:46<20:07:07,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 160/12323 [16:29<20:53:35,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 160/12323 [16:29<20:53:35,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 161/12323 [16:34<20:52:27,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 161/12323 [16:34<20:52:27,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 162/12323 [16:39<20:49:59,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 162/12323 [16:39<20:49:59,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 163/12323 [16:42<20:46:16,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 163/12323 [16:42<20:46:16,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 164/12323 [16:45<20:41:56,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 164/12323 [16:45<20:41:56,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 165/12323 [16:48<20:38:20,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 165/12323 [16:48<20:38:20,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 166/12323 [16:51<20:34:45,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 166/12323 [16:51<20:34:45,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 167/12323 [16:56<20:33:39,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 167/12323 [16:56<20:33:39,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 168/12323 [16:58<20:27:44,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 168/12323 [16:58<20:27:44,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 169/12323 [16:59<20:21:53,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 169/12323 [16:59<20:21:53,  6.03s/it, v_num=i2o7, train/loss=6."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 170/12323 [17:02<20:17:54,  6.01s/it, v_num=i2o7, train/loss=6.\r",
+      "Epoch 0:   1%| | 170/12323 [17:02<20:17:54,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 171/12323 [17:06<20:15:47,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 171/12323 [17:06<20:15:47,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 172/12323 [17:10<20:13:44,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 172/12323 [17:10<20:13:44,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 173/12323 [17:13<20:09:54,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 173/12323 [17:13<20:09:54,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 174/12323 [17:21<20:11:27,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 174/12323 [17:21<20:11:27,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 175/12323 [17:29<20:14:16,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 175/12323 [17:29<20:14:16,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 176/12323 [17:36<20:14:42,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 176/12323 [17:36<20:14:42,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 177/12323 [17:40<20:12:42,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 177/12323 [17:40<20:12:42,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 178/12323 [17:43<20:09:02,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 178/12323 [17:43<20:09:02,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 179/12323 [17:49<20:09:25,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 179/12323 [17:49<20:09:25,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 180/12323 [17:52<20:05:45,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 180/12323 [17:52<20:05:45,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 181/12323 [17:58<20:06:08,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 181/12323 [17:58<20:06:08,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 182/12323 [18:06<20:07:38,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 182/12323 [18:06<20:07:38,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 183/12323 [18:09<20:04:35,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 183/12323 [18:09<20:04:35,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
+      "\r",
+      "Epoch 0:   1%| | 184/12323 [18:12<20:01:34,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 184/12323 [18:12<20:01:34,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 185/12323 [18:21<20:04:11,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 185/12323 [18:21<20:04:11,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 186/12323 [18:28<20:05:39,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 186/12323 [18:28<20:05:39,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 187/12323 [18:36<20:07:07,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 187/12323 [18:36<20:07:07,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 188/12323 [18:42<20:07:30,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 188/12323 [18:42<20:07:30,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 189/12323 [18:50<20:10:04,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 189/12323 [18:50<20:10:04,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 190/12323 [18:56<20:09:18,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 190/12323 [18:56<20:09:18,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 191/12323 [19:03<20:10:47,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 191/12323 [19:03<20:10:47,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 192/12323 [19:41<20:44:26,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 192/12323 [19:41<20:44:26,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 193/12323 [19:47<20:43:32,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 193/12323 [19:47<20:43:32,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 194/12323 [19:49<20:39:54,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 194/12323 [19:49<20:39:54,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 195/12323 [19:51<20:35:21,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 195/12323 [19:51<20:35:21,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 196/12323 [19:56<20:33:24,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 196/12323 [19:56<20:33:24,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 197/12323 [20:02<20:33:36,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 197/12323 [20:02<20:33:36,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 198/12323 [20:08<20:33:45,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 198/12323 [20:08<20:33:45,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 199/12323 [20:12<20:30:49,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 199/12323 [20:12<20:30:49,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 8 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 200/12323 [20:15<20:27:55,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 200/12323 [20:15<20:27:55,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 201/12323 [20:20<20:27:03,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 201/12323 [20:20<20:27:03,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 202/12323 [20:24<20:24:09,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 202/12323 [20:24<20:24:10,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 203/12323 [20:28<20:22:49,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 203/12323 [20:28<20:22:49,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 204/12323 [20:35<20:23:05,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 204/12323 [20:35<20:23:05,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 205/12323 [20:39<20:20:46,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 205/12323 [20:39<20:20:46,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 206/12323 [20:42<20:18:30,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 206/12323 [20:42<20:18:30,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 207/12323 [20:45<20:15:13,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 207/12323 [20:45<20:15:13,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 208/12323 [20:51<20:14:28,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 208/12323 [20:51<20:14:28,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 209/12323 [20:59<20:16:41,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 209/12323 [20:59<20:16:41,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 210/12323 [21:03<20:14:55,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 210/12323 [21:03<20:14:55,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 211/12323 [21:12<20:17:02,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 211/12323 [21:12<20:17:02,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 212/12323 [21:18<20:17:09,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 212/12323 [21:18<20:17:09,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 213/12323 [21:23<20:16:17,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 213/12323 [21:23<20:16:17,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 214/12323 [21:24<20:11:40,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 214/12323 [21:24<20:11:40,  6.00s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 215/12323 [21:28<20:09:27,  5.99s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   2%| | 215/12323 [21:28<20:09:27,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 216/12323 [21:34<20:09:35,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 216/12323 [21:34<20:09:35,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 217/12323 [21:39<20:08:19,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 217/12323 [21:39<20:08:19,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 218/12323 [21:43<20:06:36,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 218/12323 [21:43<20:06:36,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 219/12323 [21:47<20:04:28,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 219/12323 [21:47<20:04:28,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 220/12323 [21:51<20:02:18,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 220/12323 [21:51<20:02:18,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 221/12323 [21:57<20:02:29,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 221/12323 [21:57<20:02:29,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 222/12323 [22:03<20:02:39,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 222/12323 [22:03<20:02:39,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 223/12323 [22:06<19:59:37,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 223/12323 [22:06<19:59:37,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 224/12323 [22:45<20:29:31,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 224/12323 [22:45<20:29:31,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 225/12323 [22:49<20:26:52,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 225/12323 [22:49<20:26:52,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 226/12323 [22:53<20:25:37,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 226/12323 [22:53<20:25:37,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 227/12323 [22:59<20:24:48,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 227/12323 [22:59<20:24:48,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 228/12323 [23:00<20:20:56,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 228/12323 [23:00<20:20:56,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 8 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 229/12323 [23:07<20:21:07,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 229/12323 [23:07<20:21:07,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 230/12323 [23:14<20:22:09,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 230/12323 [23:14<20:22:09,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 231/12323 [23:17<20:19:10,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 231/12323 [23:17<20:19:10,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 232/12323 [23:25<20:20:59,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 232/12323 [23:25<20:20:59,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 233/12323 [23:34<20:22:51,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 233/12323 [23:34<20:22:51,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 234/12323 [23:35<20:18:36,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 234/12323 [23:35<20:18:36,  6.05s/it, v_num=i2o7, train/loss=6."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 235/12323 [23:42<20:19:33,  6.05s/it, v_num=i2o7, train/loss=6.\r",
+      "Epoch 0:   2%| | 235/12323 [23:42<20:19:33,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 236/12323 [23:44<20:16:12,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 236/12323 [23:44<20:16:12,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 237/12323 [23:51<20:16:20,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 237/12323 [23:51<20:16:20,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 238/12323 [23:55<20:15:12,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 238/12323 [23:55<20:15:12,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 239/12323 [24:00<20:13:35,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 239/12323 [24:00<20:13:35,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 240/12323 [24:05<20:12:53,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 240/12323 [24:05<20:12:54,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 241/12323 [24:07<20:09:14,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 241/12323 [24:07<20:09:14,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 242/12323 [24:15<20:11:06,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 242/12323 [24:15<20:11:06,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 243/12323 [24:20<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 243/12323 [24:20<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 244/12323 [24:28<20:11:33,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 244/12323 [24:28<20:11:33,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 245/12323 [24:32<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 245/12323 [24:32<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 246/12323 [24:38<20:09:51,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 246/12323 [24:38<20:09:51,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 247/12323 [24:41<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 247/12323 [24:41<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 248/12323 [24:48<20:07:48,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 248/12323 [24:48<20:07:48,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 249/12323 [24:54<20:08:05,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 249/12323 [24:54<20:08:05,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 250/12323 [25:00<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 250/12323 [25:00<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 251/12323 [25:06<20:07:46,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 251/12323 [25:06<20:07:46,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 252/12323 [25:08<20:03:55,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 252/12323 [25:08<20:03:55,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 253/12323 [25:15<20:04:56,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 253/12323 [25:15<20:04:56,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 254/12323 [25:19<20:03:29,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 254/12323 [25:19<20:03:29,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 255/12323 [25:24<20:02:25,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 255/12323 [25:24<20:02:25,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 256/12323 [25:58<20:24:35,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 256/12323 [25:58<20:24:35,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 257/12323 [26:01<20:21:30,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 257/12323 [26:01<20:21:30,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 258/12323 [26:07<20:21:37,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 258/12323 [26:07<20:21:37,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 259/12323 [26:10<20:19:21,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 259/12323 [26:10<20:19:21,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 260/12323 [26:18<20:20:15,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 260/12323 [26:18<20:20:15,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 261/12323 [26:20<20:17:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 261/12323 [26:20<20:17:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 262/12323 [26:27<20:17:42,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 262/12323 [26:27<20:17:42,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 263/12323 [26:31<20:16:34,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 263/12323 [26:31<20:16:34,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 264/12323 [26:35<20:14:19,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 264/12323 [26:35<20:14:19,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 265/12323 [26:39<20:13:15,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 265/12323 [26:39<20:13:15,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 266/12323 [26:44<20:11:48,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 266/12323 [26:44<20:11:48,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 267/12323 [26:48<20:10:23,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 267/12323 [26:48<20:10:23,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 268/12323 [26:57<20:12:30,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 268/12323 [26:57<20:12:30,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 269/12323 [27:00<20:10:21,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 269/12323 [27:00<20:10:21,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 270/12323 [27:07<20:11:12,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 270/12323 [27:07<20:11:12,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 271/12323 [27:13<20:10:39,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 271/12323 [27:13<20:10:39,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 272/12323 [27:18<20:09:41,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 272/12323 [27:18<20:09:41,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 273/12323 [27:24<20:09:52,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 273/12323 [27:24<20:09:52,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 274/12323 [27:27<20:07:45,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 274/12323 [27:27<20:07:45,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 275/12323 [27:31<20:05:41,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 275/12323 [27:31<20:05:41,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 276/12323 [27:39<20:07:26,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 276/12323 [27:39<20:07:26,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 277/12323 [27:42<20:04:41,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 277/12323 [27:42<20:04:41,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 278/12323 [27:47<20:03:47,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 278/12323 [27:47<20:03:47,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 279/12323 [27:52<20:03:13,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 279/12323 [27:52<20:03:13,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 280/12323 [28:00<20:04:52,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 280/12323 [28:00<20:04:52,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 281/12323 [28:04<20:02:50,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 281/12323 [28:04<20:02:50,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 282/12323 [28:10<20:03:04,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 282/12323 [28:10<20:03:04,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 283/12323 [28:15<20:02:11,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 283/12323 [28:15<20:02:11,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 284/12323 [28:21<20:02:25,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 284/12323 [28:21<20:02:25,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 285/12323 [28:23<19:59:20,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 285/12323 [28:23<19:59:20,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 286/12323 [28:26<19:57:00,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 286/12323 [28:26<19:57:00,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 287/12323 [28:32<19:57:13,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 287/12323 [28:32<19:57:13,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 288/12323 [29:19<20:25:29,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 288/12323 [29:19<20:25:29,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 289/12323 [29:22<20:23:04,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 289/12323 [29:22<20:23:04,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 290/12323 [29:27<20:22:23,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 290/12323 [29:27<20:22:23,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 291/12323 [29:31<20:20:38,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 291/12323 [29:31<20:20:38,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 292/12323 [29:34<20:18:37,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 292/12323 [29:34<20:18:37,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 38 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 293/12323 [29:39<20:17:37,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 293/12323 [29:39<20:17:37,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 294/12323 [29:44<20:16:36,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 294/12323 [29:44<20:16:36,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 295/12323 [29:47<20:14:56,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 295/12323 [29:47<20:14:56,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 296/12323 [29:50<20:12:36,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 296/12323 [29:50<20:12:36,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 297/12323 [29:55<20:12:00,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 297/12323 [29:55<20:12:00,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 298/12323 [29:59<20:10:27,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 298/12323 [29:59<20:10:27,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 299/12323 [30:04<20:09:31,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 299/12323 [30:04<20:09:31,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 300/12323 [30:09<20:08:56,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 300/12323 [30:09<20:08:56,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 301/12323 [30:13<20:07:20,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 301/12323 [30:13<20:07:20,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 302/12323 [30:21<20:08:04,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 302/12323 [30:21<20:08:04,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 303/12323 [30:24<20:06:08,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 303/12323 [30:24<20:06:08,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 304/12323 [30:32<20:07:28,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 304/12323 [30:32<20:07:28,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 305/12323 [30:36<20:06:12,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 305/12323 [30:36<20:06:12,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 306/12323 [30:44<20:07:35,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 306/12323 [30:44<20:07:35,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 307/12323 [30:51<20:07:40,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 307/12323 [30:51<20:07:40,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
+      "\r",
+      "Epoch 0:   2%| | 308/12323 [30:58<20:08:19,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 308/12323 [30:58<20:08:19,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 309/12323 [31:01<20:06:24,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 309/12323 [31:01<20:06:24,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 310/12323 [31:07<20:06:27,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 310/12323 [31:07<20:06:27,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 311/12323 [31:14<20:06:31,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 311/12323 [31:14<20:06:31,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 312/12323 [31:16<20:03:59,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 312/12323 [31:16<20:03:59,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 313/12323 [31:24<20:05:18,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 313/12323 [31:24<20:05:18,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 314/12323 [31:30<20:04:43,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 314/12323 [31:30<20:04:43,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 315/12323 [31:34<20:03:30,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 315/12323 [31:34<20:03:30,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 316/12323 [31:35<20:00:41,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 316/12323 [31:35<20:00:41,  6.00s/it, v_num=i2o7, train/loss=4."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 317/12323 [31:43<20:01:22,  6.00s/it, v_num=i2o7, train/loss=4.\r",
+      "Epoch 0:   3%| | 317/12323 [31:43<20:01:22,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 318/12323 [31:46<19:59:32,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 318/12323 [31:46<19:59:32,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 319/12323 [31:52<19:59:38,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 319/12323 [31:52<19:59:38,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 320/12323 [32:44<20:27:51,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 320/12323 [32:44<20:27:51,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 321/12323 [32:47<20:26:20,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 321/12323 [32:47<20:26:20,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 322/12323 [32:54<20:26:25,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 322/12323 [32:54<20:26:25,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 323/12323 [32:57<20:24:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 323/12323 [32:57<20:24:34,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 324/12323 [32:59<20:22:05,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 324/12323 [32:59<20:22:05,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 325/12323 [33:01<20:19:01,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 325/12323 [33:01<20:19:01,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 326/12323 [33:09<20:20:13,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 326/12323 [33:09<20:20:13,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 327/12323 [33:14<20:19:36,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 327/12323 [33:14<20:19:36,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 328/12323 [33:22<20:20:16,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 328/12323 [33:22<20:20:16,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 329/12323 [33:29<20:20:52,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 329/12323 [33:29<20:20:52,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 330/12323 [33:35<20:20:50,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 330/12323 [33:35<20:20:50,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 331/12323 [33:42<20:21:25,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 331/12323 [33:42<20:21:25,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 332/12323 [33:46<20:20:10,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 332/12323 [33:46<20:20:10,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 333/12323 [33:52<20:19:31,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 333/12323 [33:52<20:19:31,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 334/12323 [34:00<20:20:40,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 334/12323 [34:00<20:20:40,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 335/12323 [34:04<20:19:09,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 335/12323 [34:04<20:19:09,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 336/12323 [34:06<20:17:03,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 336/12323 [34:06<20:17:03,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 337/12323 [34:15<20:18:13,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 337/12323 [34:15<20:18:13,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 338/12323 [34:21<20:18:15,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 338/12323 [34:21<20:18:15,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 339/12323 [34:27<20:18:17,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 339/12323 [34:27<20:18:17,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 340/12323 [34:32<20:17:05,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 340/12323 [34:32<20:17:05,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 341/12323 [34:33<20:14:10,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 341/12323 [34:33<20:14:10,  6.08s/it, v_num=i2o7, train/loss=5."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 342/12323 [34:39<20:14:13,  6.08s/it, v_num=i2o7, train/loss=5.\r",
+      "Epoch 0:   3%| | 342/12323 [34:39<20:14:14,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 343/12323 [34:45<20:14:14,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 343/12323 [34:45<20:14:14,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 344/12323 [34:53<20:14:50,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 344/12323 [34:53<20:14:50,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 345/12323 [34:59<20:14:52,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 345/12323 [34:59<20:14:52,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 346/12323 [35:02<20:13:09,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 346/12323 [35:02<20:13:09,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 347/12323 [35:07<20:12:21,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 347/12323 [35:07<20:12:21,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 348/12323 [35:16<20:13:35,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 348/12323 [35:16<20:13:35,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 349/12323 [35:19<20:12:11,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 349/12323 [35:19<20:12:11,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 350/12323 [35:21<20:09:21,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 350/12323 [35:21<20:09:21,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 351/12323 [35:28<20:09:59,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 351/12323 [35:28<20:09:59,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 352/12323 [35:55<20:21:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 352/12323 [35:55<20:21:34,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 353/12323 [36:00<20:21:00,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 353/12323 [36:00<20:21:00,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 354/12323 [36:04<20:19:34,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 354/12323 [36:04<20:19:34,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 355/12323 [36:08<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 355/12323 [36:08<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 356/12323 [36:15<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 356/12323 [36:15<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 357/12323 [36:22<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 357/12323 [36:22<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 358/12323 [36:28<20:19:16,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 358/12323 [36:28<20:19:16,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 359/12323 [36:35<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 359/12323 [36:35<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 360/12323 [36:38<20:17:35,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 360/12323 [36:38<20:17:35,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 361/12323 [36:46<20:18:43,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 361/12323 [36:46<20:18:43,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 362/12323 [36:55<20:19:54,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 362/12323 [36:55<20:19:54,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 363/12323 [36:56<20:17:19,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 363/12323 [36:56<20:17:19,  6.11s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 364/12323 [36:58<20:14:33,  6.09s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   3%| | 364/12323 [36:58<20:14:33,  6.09s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 365/12323 [37:05<20:15:07,  6.10s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   3%| | 365/12323 [37:05<20:15:07,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 366/12323 [37:09<20:13:44,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 366/12323 [37:09<20:13:44,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 367/12323 [37:12<20:12:22,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 367/12323 [37:12<20:12:22,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 368/12323 [37:17<20:11:32,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 368/12323 [37:17<20:11:32,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 369/12323 [37:20<20:09:54,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 369/12323 [37:20<20:09:54,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 370/12323 [37:25<20:08:51,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 370/12323 [37:25<20:08:51,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 371/12323 [37:28<20:07:14,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 371/12323 [37:28<20:07:14,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 372/12323 [37:33<20:06:25,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 372/12323 [37:33<20:06:25,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 373/12323 [37:41<20:07:28,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 373/12323 [37:41<20:07:28,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 374/12323 [37:45<20:06:07,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 374/12323 [37:45<20:06:07,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 375/12323 [37:52<20:06:37,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 375/12323 [37:52<20:06:37,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 376/12323 [37:58<20:06:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 376/12323 [37:58<20:06:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 377/12323 [38:05<20:07:05,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 377/12323 [38:05<20:07:05,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 378/12323 [38:07<20:04:57,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 378/12323 [38:07<20:04:57,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 379/12323 [38:15<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 379/12323 [38:15<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 380/12323 [38:21<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 380/12323 [38:21<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 381/12323 [38:24<20:04:07,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 381/12323 [38:24<20:04:07,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 382/12323 [38:32<20:04:38,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 382/12323 [38:32<20:04:38,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 383/12323 [38:35<20:03:03,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 383/12323 [38:35<20:03:03,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 384/12323 [39:12<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 384/12323 [39:12<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 385/12323 [39:18<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 385/12323 [39:18<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 386/12323 [39:27<20:20:10,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 386/12323 [39:27<20:20:10,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 387/12323 [39:33<20:20:09,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 387/12323 [39:33<20:20:09,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 388/12323 [39:41<20:21:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 388/12323 [39:41<20:21:09,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 389/12323 [39:47<20:20:35,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 389/12323 [39:47<20:20:36,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 390/12323 [39:53<20:20:36,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 390/12323 [39:53<20:20:36,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 391/12323 [39:58<20:20:04,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 391/12323 [39:58<20:20:04,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 24 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 392/12323 [40:06<20:20:32,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 392/12323 [40:06<20:20:32,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 393/12323 [40:11<20:20:01,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 393/12323 [40:11<20:20:01,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 394/12323 [40:16<20:19:28,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 394/12323 [40:16<20:19:28,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 395/12323 [40:21<20:18:28,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 395/12323 [40:21<20:18:28,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 396/12323 [40:27<20:18:34,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 396/12323 [40:27<20:18:34,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 32 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 397/12323 [40:34<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 397/12323 [40:34<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 398/12323 [40:42<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 398/12323 [40:42<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 399/12323 [40:48<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 399/12323 [40:48<20:19:40,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 400/12323 [40:57<20:20:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 400/12323 [40:57<20:20:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 401/12323 [41:03<20:20:40,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 401/12323 [41:03<20:20:40,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 402/12323 [41:10<20:21:10,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 402/12323 [41:10<20:21:10,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 403/12323 [41:18<20:21:39,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 403/12323 [41:18<20:21:39,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 404/12323 [41:20<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 404/12323 [41:20<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 405/12323 [41:27<20:20:07,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 405/12323 [41:27<20:20:07,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 406/12323 [41:36<20:21:08,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 406/12323 [41:36<20:21:08,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 407/12323 [41:38<20:19:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 407/12323 [41:38<20:19:08,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 408/12323 [41:43<20:18:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 408/12323 [41:43<20:18:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 409/12323 [41:45<20:16:26,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 409/12323 [41:45<20:16:26,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 410/12323 [41:48<20:14:43,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 410/12323 [41:48<20:14:43,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 411/12323 [41:53<20:14:15,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 411/12323 [41:53<20:14:15,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 412/12323 [41:56<20:12:33,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 412/12323 [41:56<20:12:33,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 413/12323 [41:59<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 413/12323 [41:59<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 414/12323 [42:06<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 414/12323 [42:06<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 415/12323 [42:14<20:12:02,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 415/12323 [42:14<20:12:02,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 416/12323 [42:31<20:17:00,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 416/12323 [42:31<20:17:00,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 417/12323 [42:39<20:17:58,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 417/12323 [42:39<20:17:59,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 418/12323 [42:41<20:16:03,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 418/12323 [42:41<20:16:03,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 419/12323 [42:46<20:15:19,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 419/12323 [42:46<20:15:19,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 420/12323 [42:49<20:13:38,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 420/12323 [42:49<20:13:38,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 421/12323 [42:56<20:14:08,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 421/12323 [42:56<20:14:08,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 422/12323 [43:05<20:15:03,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 422/12323 [43:05<20:15:03,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 423/12323 [43:11<20:15:05,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 423/12323 [43:11<20:15:05,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 424/12323 [43:18<20:15:31,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 424/12323 [43:18<20:15:31,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 425/12323 [43:23<20:14:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 425/12323 [43:23<20:14:34,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 426/12323 [43:26<20:13:09,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 426/12323 [43:26<20:13:09,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 427/12323 [43:31<20:12:25,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 427/12323 [43:31<20:12:25,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 428/12323 [43:36<20:11:56,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 428/12323 [43:36<20:11:56,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 429/12323 [43:39<20:10:18,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 429/12323 [43:39<20:10:18,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 430/12323 [43:43<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 430/12323 [43:43<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   3%| | 431/12323 [43:49<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 431/12323 [43:49<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 432/12323 [43:55<20:08:51,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 432/12323 [43:55<20:08:51,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 433/12323 [44:01<20:08:49,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 433/12323 [44:01<20:08:49,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 434/12323 [44:08<20:09:19,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 434/12323 [44:08<20:09:19,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 435/12323 [44:16<20:09:46,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 435/12323 [44:16<20:09:46,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 436/12323 [44:18<20:08:10,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 436/12323 [44:18<20:08:10,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 437/12323 [44:20<20:05:52,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 437/12323 [44:20<20:05:52,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 438/12323 [44:23<20:04:44,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 438/12323 [44:23<20:04:44,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 439/12323 [44:26<20:02:55,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 439/12323 [44:26<20:02:55,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 440/12323 [44:32<20:02:54,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 440/12323 [44:32<20:02:54,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 441/12323 [44:39<20:03:21,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 441/12323 [44:39<20:03:21,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 442/12323 [44:40<20:01:05,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 442/12323 [44:40<20:01:05,  6.07s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 443/12323 [44:49<20:01:58,  6.07s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 443/12323 [44:49<20:01:58,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 444/12323 [44:56<20:02:25,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 444/12323 [44:56<20:02:25,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 445/12323 [45:01<20:01:43,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 445/12323 [45:01<20:01:43,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 446/12323 [45:06<20:01:16,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 446/12323 [45:06<20:01:16,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 447/12323 [45:11<20:00:36,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 447/12323 [45:11<20:00:36,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 448/12323 [45:52<20:16:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 448/12323 [45:52<20:16:08,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 449/12323 [45:59<20:16:08,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 449/12323 [45:59<20:16:08,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 450/12323 [46:00<20:14:07,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 450/12323 [46:00<20:14:07,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 451/12323 [46:04<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 451/12323 [46:04<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 452/12323 [46:10<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 452/12323 [46:10<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 453/12323 [46:15<20:12:18,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 453/12323 [46:15<20:12:18,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 454/12323 [46:19<20:10:58,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 454/12323 [46:19<20:10:58,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 455/12323 [46:23<20:09:52,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 455/12323 [46:23<20:09:52,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 456/12323 [46:26<20:08:32,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 456/12323 [46:26<20:08:32,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 457/12323 [46:27<20:06:21,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 457/12323 [46:27<20:06:21,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 458/12323 [46:32<20:05:54,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 458/12323 [46:32<20:05:54,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 459/12323 [46:41<20:06:44,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 459/12323 [46:41<20:06:44,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 460/12323 [46:46<20:06:17,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 460/12323 [46:46<20:06:17,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 461/12323 [46:49<20:04:46,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 461/12323 [46:49<20:04:46,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 462/12323 [46:53<20:03:54,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 462/12323 [46:53<20:03:54,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 463/12323 [46:58<20:03:27,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 463/12323 [46:58<20:03:27,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 464/12323 [47:04<20:03:02,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 464/12323 [47:04<20:03:02,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 465/12323 [47:10<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 465/12323 [47:10<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 466/12323 [47:17<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 466/12323 [47:17<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 467/12323 [47:22<20:02:40,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 467/12323 [47:22<20:02:40,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 468/12323 [47:25<20:01:23,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 468/12323 [47:25<20:01:23,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 469/12323 [47:27<19:59:41,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 469/12323 [47:27<19:59:41,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 470/12323 [47:34<19:59:43,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 470/12323 [47:34<19:59:43,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 471/12323 [47:35<19:57:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 471/12323 [47:35<19:57:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 472/12323 [47:41<19:57:37,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 472/12323 [47:41<19:57:37,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 473/12323 [47:47<19:57:14,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 473/12323 [47:47<19:57:14,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 474/12323 [47:52<19:56:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 474/12323 [47:52<19:56:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 475/12323 [47:56<19:55:46,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 475/12323 [47:56<19:55:46,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 476/12323 [47:57<19:53:41,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 476/12323 [47:57<19:53:41,  6.05s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 477/12323 [48:04<19:53:44,  6.05s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 477/12323 [48:04<19:53:44,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 478/12323 [48:09<19:53:19,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 478/12323 [48:09<19:53:19,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 479/12323 [48:13<19:52:30,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 479/12323 [48:13<19:52:30,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 480/12323 [49:16<20:15:33,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 480/12323 [49:16<20:15:33,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 481/12323 [49:21<20:15:08,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 481/12323 [49:21<20:15:08,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 482/12323 [49:23<20:13:14,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 482/12323 [49:23<20:13:14,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 483/12323 [49:27<20:12:11,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 483/12323 [49:27<20:12:11,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 484/12323 [49:30<20:10:55,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 484/12323 [49:30<20:10:55,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 485/12323 [49:35<20:10:14,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 485/12323 [49:35<20:10:14,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Done ##\n"
+      "\r",
+      "Epoch 0:   4%| | 486/12323 [49:38<20:08:58,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 486/12323 [49:38<20:08:58,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "total 6.1G\n"
+      "\r",
+      "Epoch 0:   4%| | 487/12323 [49:41<20:07:43,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 487/12323 [49:41<20:07:43,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 9.5K Sep  1 14:53 gen-word-10-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 488/12323 [49:50<20:08:34,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 488/12323 [49:50<20:08:34,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  52K Sep  1 14:53 gen-word-100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 489/12323 [49:52<20:07:08,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 489/12323 [49:52<20:07:08,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 489K Sep  1 14:53 gen-word-1000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 490/12323 [49:57<20:06:33,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 490/12323 [49:57<20:06:33,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  56K Sep  1 14:53 gen-word-105-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 491/12323 [50:03<20:06:07,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 491/12323 [50:03<20:06:07,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  59K Sep  1 14:53 gen-word-110-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 492/12323 [50:06<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 492/12323 [50:06<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  22M Sep  1 14:53 gen-word-1100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 493/12323 [50:13<20:05:20,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 493/12323 [50:13<20:05:20,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  61K Sep  1 14:53 gen-word-115-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 494/12323 [50:19<20:04:57,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 494/12323 [50:19<20:04:57,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  63K Sep  1 14:53 gen-word-120-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 495/12323 [50:26<20:05:19,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 495/12323 [50:26<20:05:19,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  23M Sep  1 14:53 gen-word-1200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 496/12323 [50:31<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 496/12323 [50:31<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69K Sep  1 14:53 gen-word-125-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 497/12323 [50:40<20:05:41,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 497/12323 [50:40<20:05:41,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69K Sep  1 14:53 gen-word-130-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 498/12323 [50:44<20:04:52,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 498/12323 [50:44<20:04:52,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  25M Sep  1 14:53 gen-word-1300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 499/12323 [50:45<20:02:51,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 499/12323 [50:45<20:02:51,  6.10s/it, v_num=i2o7, train/loss=2."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69K Sep  1 14:53 gen-word-135-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 500/12323 [50:53<20:03:14,  6.11s/it, v_num=i2o7, train/loss=2.\r",
+      "Epoch 0:   4%| | 500/12323 [50:53<20:03:14,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  71K Sep  1 14:53 gen-word-140-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 501/12323 [50:55<20:01:37,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 501/12323 [50:55<20:01:37,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27M Sep  1 14:53 gen-word-1400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 502/12323 [51:03<20:02:22,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 502/12323 [51:03<20:02:22,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  76K Sep  1 14:53 gen-word-145-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 503/12323 [51:04<20:00:23,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 503/12323 [51:04<20:00:23,  6.09s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  12K Sep  1 14:53 gen-word-15-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 504/12323 [51:11<20:00:22,  6.09s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 504/12323 [51:11<20:00:22,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  78K Sep  1 14:53 gen-word-150-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 505/12323 [51:18<20:00:43,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 505/12323 [51:18<20:00:43,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29M Sep  1 14:53 gen-word-1500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 506/12323 [51:26<20:01:30,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 506/12323 [51:26<20:01:30,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  80K Sep  1 14:53 gen-word-155-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 507/12323 [51:31<20:00:41,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 507/12323 [51:31<20:00:41,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  83K Sep  1 14:53 gen-word-160-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 508/12323 [51:33<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 508/12323 [51:33<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31M Sep  1 14:53 gen-word-1600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 509/12323 [51:39<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 509/12323 [51:39<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  84K Sep  1 14:53 gen-word-165-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 510/12323 [51:48<19:59:51,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 510/12323 [51:48<19:59:51,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  86K Sep  1 14:53 gen-word-170-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 511/12323 [51:55<20:00:13,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 511/12323 [51:55<20:00:13,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  33M Sep  1 14:53 gen-word-1700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 512/12323 [52:26<20:09:36,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 512/12323 [52:26<20:09:36,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  91K Sep  1 14:53 gen-word-175-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 513/12323 [52:33<20:09:58,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 513/12323 [52:33<20:09:58,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  91K Sep  1 14:53 gen-word-180-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 514/12323 [52:38<20:09:34,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 514/12323 [52:38<20:09:34,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  35M Sep  1 14:53 gen-word-1800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 515/12323 [52:41<20:08:10,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 515/12323 [52:41<20:08:10,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  94K Sep  1 14:53 gen-word-185-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 516/12323 [52:46<20:07:44,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 516/12323 [52:46<20:07:44,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  97K Sep  1 14:53 gen-word-190-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 517/12323 [52:55<20:08:28,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 517/12323 [52:55<20:08:28,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  37M Sep  1 14:53 gen-word-1900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 518/12323 [53:03<20:09:09,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 518/12323 [53:03<20:09:09,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 100K Sep  1 14:53 gen-word-195-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 519/12323 [53:06<20:07:58,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 519/12323 [53:06<20:07:58,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  15K Sep  1 14:53 gen-word-20-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 520/12323 [53:10<20:06:57,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 520/12323 [53:10<20:06:57,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 103K Sep  1 14:53 gen-word-200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 521/12323 [53:16<20:06:52,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 521/12323 [53:16<20:06:52,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  39M Sep  1 14:53 gen-word-2000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 522/12323 [53:24<20:07:33,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 522/12323 [53:24<20:07:33,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 105K Sep  1 14:53 gen-word-205-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 523/12323 [53:31<20:07:30,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 523/12323 [53:31<20:07:30,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 108K Sep  1 14:53 gen-word-210-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 524/12323 [53:34<20:06:20,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 524/12323 [53:34<20:06:20,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  41M Sep  1 14:53 gen-word-2100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 525/12323 [53:42<20:07:04,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 525/12323 [53:42<20:07:04,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 108K Sep  1 14:53 gen-word-215-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 526/12323 [53:50<20:07:25,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 526/12323 [53:50<20:07:25,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 114K Sep  1 14:53 gen-word-220-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 527/12323 [53:53<20:06:05,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 527/12323 [53:53<20:06:05,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  43M Sep  1 14:53 gen-word-2200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 528/12323 [53:54<20:04:10,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 528/12323 [53:54<20:04:10,  6.13s/it, v_num=i2o7, train/loss=2."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 115K Sep  1 14:53 gen-word-225-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 529/12323 [53:58<20:03:12,  6.12s/it, v_num=i2o7, train/loss=2.\r",
+      "Epoch 0:   4%| | 529/12323 [53:58<20:03:12,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 118K Sep  1 14:53 gen-word-230-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 530/12323 [54:05<20:03:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 530/12323 [54:05<20:03:35,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  44M Sep  1 14:53 gen-word-2300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 531/12323 [54:11<20:03:35,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 531/12323 [54:11<20:03:35,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 120K Sep  1 14:53 gen-word-235-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 532/12323 [54:15<20:02:26,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 532/12323 [54:15<20:02:26,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 122K Sep  1 14:53 gen-word-240-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 533/12323 [54:19<20:01:29,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 533/12323 [54:19<20:01:29,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  46M Sep  1 14:53 gen-word-2400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 534/12323 [54:23<20:00:54,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 534/12323 [54:23<20:00:54,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 125K Sep  1 14:53 gen-word-245-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 535/12323 [54:25<19:59:02,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 535/12323 [54:25<19:59:02,  6.10s/it, v_num=i2o7, train/loss=3."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  18K Sep  1 14:53 gen-word-25-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 536/12323 [54:28<19:57:55,  6.10s/it, v_num=i2o7, train/loss=3.\r",
+      "Epoch 0:   4%| | 536/12323 [54:28<19:57:55,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 127K Sep  1 14:53 gen-word-250-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 537/12323 [54:29<19:56:03,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 537/12323 [54:29<19:56:03,  6.09s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  48M Sep  1 14:53 gen-word-2500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 538/12323 [54:37<19:56:25,  6.09s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 538/12323 [54:37<19:56:25,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 131K Sep  1 14:53 gen-word-255-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 539/12323 [54:40<19:55:30,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 539/12323 [54:40<19:55:30,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 129K Sep  1 14:53 gen-word-260-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 540/12323 [54:43<19:54:12,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 540/12323 [54:43<19:54:12,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  50M Sep  1 14:53 gen-word-2600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 541/12323 [54:46<19:52:55,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 541/12323 [54:46<19:52:55,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 132K Sep  1 14:53 gen-word-265-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 542/12323 [54:49<19:51:48,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 542/12323 [54:49<19:51:48,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 136K Sep  1 14:53 gen-word-270-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 543/12323 [54:57<19:52:10,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 543/12323 [54:57<19:52:11,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  52M Sep  1 14:53 gen-word-2700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 544/12323 [55:44<20:07:02,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 544/12323 [55:44<20:07:02,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 137K Sep  1 14:53 gen-word-275-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 545/12323 [55:46<20:05:23,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 545/12323 [55:46<20:05:23,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 143K Sep  1 14:53 gen-word-280-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 546/12323 [55:55<20:06:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 546/12323 [55:55<20:06:08,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  54M Sep  1 14:53 gen-word-2800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 547/12323 [55:58<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 547/12323 [55:58<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 142K Sep  1 14:53 gen-word-285-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 548/12323 [56:04<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 548/12323 [56:04<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 145K Sep  1 14:53 gen-word-290-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 549/12323 [56:08<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 549/12323 [56:08<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  56M Sep  1 14:53 gen-word-2900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 550/12323 [56:14<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 550/12323 [56:14<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 148K Sep  1 14:53 gen-word-295-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 551/12323 [56:19<20:03:20,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 551/12323 [56:19<20:03:20,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  20K Sep  1 14:53 gen-word-30-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 552/12323 [56:22<20:02:14,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 552/12323 [56:22<20:02:14,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 147K Sep  1 14:53 gen-word-300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 553/12323 [56:27<20:01:41,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 553/12323 [56:27<20:01:41,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  58M Sep  1 14:53 gen-word-3000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   4%| | 554/12323 [56:35<20:02:04,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 554/12323 [56:35<20:02:04,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 153K Sep  1 14:53 gen-word-305-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 555/12323 [56:39<20:01:31,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 156K Sep  1 14:53 gen-word-310-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 555/12323 [56:39<20:01:31,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  60M Sep  1 14:53 gen-word-3100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 556/12323 [56:44<20:00:47,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 556/12323 [56:44<20:00:47,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 161K Sep  1 14:53 gen-word-315-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 557/12323 [56:51<20:01:09,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 557/12323 [56:51<20:01:09,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 158K Sep  1 14:53 gen-word-320-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 558/12323 [57:00<20:01:52,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 558/12323 [57:00<20:01:52,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  61M Sep  1 14:53 gen-word-3200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 559/12323 [57:06<20:01:51,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 559/12323 [57:06<20:01:51,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 160K Sep  1 14:53 gen-word-325-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 560/12323 [57:10<20:01:08,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 560/12323 [57:10<20:01:08,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 164K Sep  1 14:53 gen-word-330-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 561/12323 [57:15<20:00:39,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 561/12323 [57:15<20:00:39,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  63M Sep  1 14:53 gen-word-3300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 562/12323 [57:20<19:59:57,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 562/12323 [57:20<19:59:57,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 172K Sep  1 14:53 gen-word-335-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 563/12323 [57:28<20:00:41,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 563/12323 [57:28<20:00:41,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 176K Sep  1 14:53 gen-word-340-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 564/12323 [57:37<20:01:25,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 564/12323 [57:37<20:01:25,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  65M Sep  1 14:53 gen-word-3400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 565/12323 [57:41<20:00:32,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 565/12323 [57:41<20:00:32,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 176K Sep  1 14:53 gen-word-345-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 566/12323 [57:43<19:58:56,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 566/12323 [57:43<19:58:56,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  22K Sep  1 14:53 gen-word-35-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 567/12323 [57:45<19:57:30,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 567/12323 [57:45<19:57:30,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 176K Sep  1 14:53 gen-word-350-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 568/12323 [57:48<19:56:14,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 568/12323 [57:48<19:56:14,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  67M Sep  1 14:53 gen-word-3500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 569/12323 [57:50<19:54:58,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 569/12323 [57:50<19:54:58,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 178K Sep  1 14:53 gen-word-355-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 570/12323 [57:53<19:53:33,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 570/12323 [57:53<19:53:33,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 179K Sep  1 14:53 gen-word-360-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 571/12323 [57:55<19:52:18,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 571/12323 [57:55<19:52:18,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69M Sep  1 14:53 gen-word-3600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 572/12323 [57:59<19:51:24,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 572/12323 [57:59<19:51:24,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 186K Sep  1 14:53 gen-word-365-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 573/12323 [58:04<19:50:52,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 573/12323 [58:04<19:50:52,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 179K Sep  1 14:53 gen-word-370-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 574/12323 [58:12<19:51:31,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 574/12323 [58:12<19:51:31,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  71M Sep  1 14:53 gen-word-3700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 575/12323 [58:17<19:51:07,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 575/12323 [58:17<19:51:07,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 186K Sep  1 14:53 gen-word-375-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 576/12323 [58:47<19:59:07,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 576/12323 [58:47<19:59:07,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 190K Sep  1 14:53 gen-word-380-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 577/12323 [58:51<19:58:14,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 577/12323 [58:51<19:58:14,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  73M Sep  1 14:53 gen-word-3800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 578/12323 [58:55<19:57:20,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 578/12323 [58:55<19:57:20,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 192K Sep  1 14:53 gen-word-385-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 579/12323 [59:00<19:56:47,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 579/12323 [59:00<19:56:47,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 196K Sep  1 14:53 gen-word-390-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 580/12323 [59:07<19:57:06,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 580/12323 [59:07<19:57:06,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  75M Sep  1 14:53 gen-word-3900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 581/12323 [59:15<19:57:44,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 581/12323 [59:15<19:57:44,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 194K Sep  1 14:53 gen-word-395-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 582/12323 [59:19<19:56:40,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 582/12323 [59:19<19:56:40,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  24K Sep  1 14:53 gen-word-40-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 583/12323 [59:23<19:56:06,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 583/12323 [59:23<19:56:06,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 196K Sep  1 14:53 gen-word-400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 584/12323 [59:32<19:56:42,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 584/12323 [59:32<19:56:42,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  77M Sep  1 14:53 gen-word-4000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 585/12323 [59:33<19:54:58,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 585/12323 [59:33<19:54:58,  6.11s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 201K Sep  1 14:53 gen-word-405-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 586/12323 [59:41<19:55:34,  6.11s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   5%| | 586/12323 [59:41<19:55:34,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 204K Sep  1 14:53 gen-word-410-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 587/12323 [59:45<19:54:40,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 587/12323 [59:45<19:54:40,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  79M Sep  1 14:53 gen-word-4100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 588/12323 [59:53<19:55:18,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 588/12323 [59:53<19:55:18,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 210K Sep  1 14:53 gen-word-415-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 589/12323 [59:56<19:54:14,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 589/12323 [59:56<19:54:14,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 209K Sep  1 14:53 gen-word-420-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 590/12323 [1:00:05<19:54:51,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 590/12323 [1:00:05<19:54:51,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  80M Sep  1 14:53 gen-word-4200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 591/12323 [1:00:09<19:54:19,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 591/12323 [1:00:09<19:54:19,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 214K Sep  1 14:53 gen-word-425-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 592/12323 [1:00:13<19:53:26,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 592/12323 [1:00:13<19:53:26,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 215K Sep  1 14:53 gen-word-430-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 593/12323 [1:00:18<19:52:53,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 593/12323 [1:00:18<19:52:53,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  82M Sep  1 14:53 gen-word-4300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 594/12323 [1:00:19<19:51:11,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 594/12323 [1:00:19<19:51:11,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 218K Sep  1 14:53 gen-word-435-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 595/12323 [1:00:21<19:49:40,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 595/12323 [1:00:21<19:49:40,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 218K Sep  1 14:53 gen-word-440-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 596/12323 [1:00:29<19:50:19,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 596/12323 [1:00:29<19:50:19,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  84M Sep  1 14:53 gen-word-4400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 597/12323 [1:00:34<19:49:49,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 597/12323 [1:00:34<19:49:49,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 220K Sep  1 14:53 gen-word-445-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 598/12323 [1:00:35<19:48:09,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 598/12323 [1:00:35<19:48:09,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 gen-word-45-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 599/12323 [1:00:42<19:48:11,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 599/12323 [1:00:42<19:48:11,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 225K Sep  1 14:53 gen-word-450-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 600/12323 [1:00:50<19:48:50,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 600/12323 [1:00:50<19:48:50,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  86M Sep  1 14:53 gen-word-4500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 601/12323 [1:00:56<19:48:27,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 601/12323 [1:00:56<19:48:27,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 226K Sep  1 14:53 gen-word-455-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 602/12323 [1:00:57<19:46:47,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 602/12323 [1:00:57<19:46:47,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 225K Sep  1 14:53 gen-word-460-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 603/12323 [1:01:01<19:46:05,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 603/12323 [1:01:01<19:46:05,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  88M Sep  1 14:53 gen-word-4600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 604/12323 [1:01:06<19:45:32,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 604/12323 [1:01:06<19:45:32,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 227K Sep  1 14:53 gen-word-465-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 605/12323 [1:01:10<19:45:00,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 605/12323 [1:01:10<19:45:00,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 234K Sep  1 14:53 gen-word-470-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 606/12323 [1:01:14<19:44:09,  6.06s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 606/12323 [1:01:14<19:44:09,  6.06s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  90M Sep  1 14:53 gen-word-4700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 607/12323 [1:01:19<19:43:39,  6.06s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 607/12323 [1:01:19<19:43:39,  6.06s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 233K Sep  1 14:53 gen-word-475-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 608/12323 [1:02:13<19:58:58,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 608/12323 [1:02:13<19:58:58,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 232K Sep  1 14:53 gen-word-480-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 609/12323 [1:02:21<19:59:17,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 609/12323 [1:02:21<19:59:17,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  92M Sep  1 14:53 gen-word-4800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 610/12323 [1:02:28<19:59:38,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 610/12323 [1:02:28<19:59:38,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 238K Sep  1 14:53 gen-word-485-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 611/12323 [1:02:33<19:59:08,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 611/12323 [1:02:33<19:59:08,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 244K Sep  1 14:53 gen-word-490-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 612/12323 [1:02:35<19:57:48,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 612/12323 [1:02:35<19:57:48,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  94M Sep  1 14:53 gen-word-4900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 613/12323 [1:02:42<19:57:49,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 613/12323 [1:02:42<19:57:49,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 249K Sep  1 14:53 gen-word-495-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 614/12323 [1:02:48<19:57:49,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 614/12323 [1:02:48<19:57:49,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 7.4K Sep  1 14:53 gen-word-5-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 615/12323 [1:02:53<19:57:19,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 615/12323 [1:02:53<19:57:19,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:53 gen-word-50-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 616/12323 [1:03:01<19:57:38,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 616/12323 [1:03:01<19:57:38,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 243K Sep  1 14:53 gen-word-500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 617/12323 [1:03:04<19:56:48,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 617/12323 [1:03:04<19:56:48,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  96M Sep  1 14:53 gen-word-5000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 618/12323 [1:03:12<19:57:07,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 618/12323 [1:03:12<19:57:07,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 254K Sep  1 14:53 gen-word-505-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 619/12323 [1:03:16<19:56:17,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 619/12323 [1:03:16<19:56:17,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 253K Sep  1 14:53 gen-word-510-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 620/12323 [1:03:18<19:55:08,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 620/12323 [1:03:18<19:55:08,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  98M Sep  1 14:53 gen-word-5100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 621/12323 [1:03:27<19:55:47,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 621/12323 [1:03:27<19:55:47,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 259K Sep  1 14:53 gen-word-515-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 622/12323 [1:03:29<19:54:28,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 622/12323 [1:03:29<19:54:28,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 260K Sep  1 14:53 gen-word-520-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 623/12323 [1:03:38<19:55:08,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 623/12323 [1:03:38<19:55:08,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  99M Sep  1 14:53 gen-word-5200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 624/12323 [1:03:44<19:55:08,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 624/12323 [1:03:44<19:55:08,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 254K Sep  1 14:53 gen-word-525-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 625/12323 [1:03:46<19:53:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 625/12323 [1:03:46<19:53:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 264K Sep  1 14:53 gen-word-530-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 626/12323 [1:03:50<19:53:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 626/12323 [1:03:50<19:53:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 101M Sep  1 14:53 gen-word-5300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 627/12323 [1:03:55<19:52:33,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 627/12323 [1:03:55<19:52:33,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 262K Sep  1 14:53 gen-word-535-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 628/12323 [1:03:59<19:51:45,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 628/12323 [1:03:59<19:51:45,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 267K Sep  1 14:53 gen-word-540-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 629/12323 [1:04:05<19:51:26,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 629/12323 [1:04:05<19:51:26,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 103M Sep  1 14:53 gen-word-5400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 630/12323 [1:04:08<19:50:28,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 630/12323 [1:04:08<19:50:28,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 272K Sep  1 14:53 gen-word-545-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 631/12323 [1:04:12<19:49:41,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 631/12323 [1:04:12<19:49:41,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:53 gen-word-55-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 632/12323 [1:04:14<19:48:15,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 632/12323 [1:04:14<19:48:15,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 278K Sep  1 14:53 gen-word-550-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 633/12323 [1:04:19<19:47:47,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 633/12323 [1:04:19<19:47:47,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 105M Sep  1 14:53 gen-word-5500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 634/12323 [1:04:20<19:46:22,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 634/12323 [1:04:20<19:46:22,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 276K Sep  1 14:53 gen-word-555-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 635/12323 [1:04:24<19:45:35,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 635/12323 [1:04:24<19:45:35,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 278K Sep  1 14:53 gen-word-560-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 636/12323 [1:04:28<19:44:48,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 636/12323 [1:04:28<19:44:48,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 107M Sep  1 14:53 gen-word-5600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 637/12323 [1:04:31<19:43:42,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 637/12323 [1:04:31<19:43:42,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 285K Sep  1 14:53 gen-word-565-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 638/12323 [1:04:39<19:44:19,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 638/12323 [1:04:39<19:44:19,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 281K Sep  1 14:53 gen-word-570-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 639/12323 [1:04:41<19:42:45,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 639/12323 [1:04:41<19:42:45,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 109M Sep  1 14:53 gen-word-5700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 640/12323 [1:05:34<19:57:02,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 640/12323 [1:05:34<19:57:02,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 289K Sep  1 14:53 gen-word-575-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 641/12323 [1:05:39<19:56:33,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 641/12323 [1:05:39<19:56:33,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 286K Sep  1 14:53 gen-word-580-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 642/12323 [1:05:46<19:56:51,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 642/12323 [1:05:46<19:56:51,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 111M Sep  1 14:53 gen-word-5800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 643/12323 [1:05:54<19:57:09,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 643/12323 [1:05:54<19:57:09,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 291K Sep  1 14:53 gen-word-585-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 644/12323 [1:06:01<19:57:27,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 644/12323 [1:06:01<19:57:27,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 294K Sep  1 14:53 gen-word-590-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 645/12323 [1:06:06<19:56:58,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 645/12323 [1:06:06<19:56:58,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 113M Sep  1 14:53 gen-word-5900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 646/12323 [1:06:11<19:56:29,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 646/12323 [1:06:11<19:56:29,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 295K Sep  1 14:53 gen-word-595-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 647/12323 [1:06:14<19:55:23,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 647/12323 [1:06:14<19:55:23,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  34K Sep  1 14:53 gen-word-60-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 648/12323 [1:06:18<19:54:44,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 648/12323 [1:06:18<19:54:44,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 298K Sep  1 14:53 gen-word-600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 649/12323 [1:06:26<19:54:59,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 649/12323 [1:06:26<19:54:59,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 115M Sep  1 14:53 gen-word-6000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 650/12323 [1:06:31<19:54:38,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 650/12323 [1:06:31<19:54:38,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 302K Sep  1 14:53 gen-word-605-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 651/12323 [1:06:38<19:54:53,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 651/12323 [1:06:38<19:54:53,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 306K Sep  1 14:53 gen-word-610-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 652/12323 [1:06:40<19:53:29,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 652/12323 [1:06:40<19:53:29,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 117M Sep  1 14:53 gen-word-6100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 653/12323 [1:06:47<19:53:44,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 653/12323 [1:06:47<19:53:44,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 302K Sep  1 14:53 gen-word-615-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 654/12323 [1:06:54<19:53:40,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 654/12323 [1:06:54<19:53:40,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 304K Sep  1 14:53 gen-word-620-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 655/12323 [1:07:01<19:53:55,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 655/12323 [1:07:01<19:53:55,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 118M Sep  1 14:53 gen-word-6200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 656/12323 [1:07:04<19:52:49,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 656/12323 [1:07:04<19:52:49,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 313K Sep  1 14:53 gen-word-625-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 657/12323 [1:07:09<19:52:28,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 657/12323 [1:07:09<19:52:28,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 311K Sep  1 14:53 gen-word-630-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 658/12323 [1:07:14<19:52:06,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 658/12323 [1:07:14<19:52:06,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 120M Sep  1 14:53 gen-word-6300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 659/12323 [1:07:15<19:50:34,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 659/12323 [1:07:15<19:50:34,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 317K Sep  1 14:53 gen-word-635-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 660/12323 [1:07:21<19:50:15,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 660/12323 [1:07:21<19:50:15,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 319K Sep  1 14:53 gen-word-640-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 661/12323 [1:07:28<19:50:28,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 661/12323 [1:07:28<19:50:28,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 122M Sep  1 14:53 gen-word-6400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 662/12323 [1:07:34<19:50:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 662/12323 [1:07:34<19:50:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 320K Sep  1 14:53 gen-word-645-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 663/12323 [1:07:36<19:49:04,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 663/12323 [1:07:36<19:49:04,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  37K Sep  1 14:53 gen-word-65-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 664/12323 [1:07:40<19:48:17,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 664/12323 [1:07:40<19:48:17,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 320K Sep  1 14:53 gen-word-650-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 665/12323 [1:07:48<19:48:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 665/12323 [1:07:48<19:48:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 124M Sep  1 14:53 gen-word-6500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 666/12323 [1:07:53<19:48:20,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 666/12323 [1:07:53<19:48:20,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 323K Sep  1 14:53 gen-word-655-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 667/12323 [1:07:54<19:46:49,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 667/12323 [1:07:54<19:46:49,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 325K Sep  1 14:53 gen-word-660-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 668/12323 [1:08:00<19:46:29,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 668/12323 [1:08:00<19:46:29,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 126M Sep  1 14:53 gen-word-6600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 669/12323 [1:08:03<19:45:32,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 669/12323 [1:08:03<19:45:32,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 327K Sep  1 14:53 gen-word-665-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 670/12323 [1:08:05<19:44:11,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 670/12323 [1:08:05<19:44:11,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 332K Sep  1 14:53 gen-word-670-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 671/12323 [1:08:06<19:42:49,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 671/12323 [1:08:06<19:42:49,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 128M Sep  1 14:53 gen-word-6700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 672/12323 [1:08:47<19:52:46,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 672/12323 [1:08:47<19:52:46,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 328K Sep  1 14:53 gen-word-675-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 673/12323 [1:08:50<19:51:41,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 673/12323 [1:08:50<19:51:41,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 334K Sep  1 14:53 gen-word-680-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 674/12323 [1:08:54<19:50:55,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 674/12323 [1:08:54<19:50:55,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 130M Sep  1 14:53 gen-word-6800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 675/12323 [1:08:57<19:49:59,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 675/12323 [1:08:57<19:49:59,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 338K Sep  1 14:53 gen-word-685-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 676/12323 [1:09:03<19:49:41,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 676/12323 [1:09:03<19:49:41,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 343K Sep  1 14:53 gen-word-690-count.jsonl\n"
+      "\r",
+      "Epoch 0:   5%| | 677/12323 [1:09:06<19:48:46,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 677/12323 [1:09:06<19:48:46,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 132M Sep  1 14:53 gen-word-6900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 678/12323 [1:09:08<19:47:35,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 678/12323 [1:09:08<19:47:35,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 339K Sep  1 14:53 gen-word-695-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 679/12323 [1:09:16<19:47:51,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 679/12323 [1:09:16<19:47:51,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  38K Sep  1 14:53 gen-word-70-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 680/12323 [1:09:20<19:47:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 680/12323 [1:09:20<19:47:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 348K Sep  1 14:53 gen-word-700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 681/12323 [1:09:27<19:47:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 681/12323 [1:09:27<19:47:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 134M Sep  1 14:53 gen-word-7000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 682/12323 [1:09:30<19:46:29,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 682/12323 [1:09:30<19:46:29,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 346K Sep  1 14:53 gen-word-705-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 683/12323 [1:09:38<19:46:45,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 683/12323 [1:09:38<19:46:45,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 353K Sep  1 14:53 gen-word-710-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 684/12323 [1:09:41<19:45:51,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 684/12323 [1:09:41<19:45:51,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 136M Sep  1 14:53 gen-word-7100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 685/12323 [1:09:46<19:45:24,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 685/12323 [1:09:46<19:45:24,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 344K Sep  1 14:53 gen-word-715-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 686/12323 [1:09:53<19:45:41,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 686/12323 [1:09:53<19:45:41,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 353K Sep  1 14:53 gen-word-720-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 687/12323 [1:10:02<19:46:15,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 687/12323 [1:10:02<19:46:15,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 137M Sep  1 14:53 gen-word-7200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 688/12323 [1:10:09<19:46:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 688/12323 [1:10:09<19:46:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 363K Sep  1 14:53 gen-word-725-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 689/12323 [1:10:11<19:45:13,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 689/12323 [1:10:11<19:45:13,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 364K Sep  1 14:53 gen-word-730-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 690/12323 [1:10:15<19:44:38,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 690/12323 [1:10:15<19:44:38,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 139M Sep  1 14:53 gen-word-7300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 691/12323 [1:10:20<19:44:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 691/12323 [1:10:20<19:44:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 367K Sep  1 14:53 gen-word-735-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 692/12323 [1:10:29<19:44:44,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 692/12323 [1:10:29<19:44:44,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 368K Sep  1 14:53 gen-word-740-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 693/12323 [1:10:37<19:45:17,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 693/12323 [1:10:37<19:45:17,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 141M Sep  1 14:53 gen-word-7400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 694/12323 [1:10:43<19:45:00,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 694/12323 [1:10:43<19:45:00,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 362K Sep  1 14:53 gen-word-745-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 695/12323 [1:10:49<19:44:58,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 695/12323 [1:10:49<19:44:58,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  43K Sep  1 14:53 gen-word-75-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 696/12323 [1:10:50<19:43:31,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 696/12323 [1:10:50<19:43:31,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 371K Sep  1 14:53 gen-word-750-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 697/12323 [1:10:54<19:42:38,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 697/12323 [1:10:54<19:42:38,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 143M Sep  1 14:53 gen-word-7500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 698/12323 [1:10:59<19:42:19,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 698/12323 [1:10:59<19:42:19,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 371K Sep  1 14:53 gen-word-755-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 699/12323 [1:11:06<19:42:34,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 699/12323 [1:11:06<19:42:34,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 379K Sep  1 14:53 gen-word-760-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 700/12323 [1:11:10<19:41:50,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 700/12323 [1:11:10<19:41:50,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 145M Sep  1 14:53 gen-word-7600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 701/12323 [1:11:17<19:42:04,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 701/12323 [1:11:17<19:42:04,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 380K Sep  1 14:53 gen-word-765-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 702/12323 [1:11:22<19:41:37,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 702/12323 [1:11:22<19:41:37,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 378K Sep  1 14:53 gen-word-770-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 703/12323 [1:11:30<19:41:52,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 703/12323 [1:11:30<19:41:52,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 147M Sep  1 14:53 gen-word-7700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 704/12323 [1:11:57<19:47:38,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 704/12323 [1:11:57<19:47:38,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 379K Sep  1 14:53 gen-word-775-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 705/12323 [1:12:04<19:47:53,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 705/12323 [1:12:04<19:47:53,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 389K Sep  1 14:53 gen-word-780-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 706/12323 [1:12:09<19:47:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 706/12323 [1:12:09<19:47:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 149M Sep  1 14:53 gen-word-7800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 707/12323 [1:12:16<19:47:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 707/12323 [1:12:16<19:47:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 389K Sep  1 14:53 gen-word-785-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 708/12323 [1:12:19<19:46:34,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 708/12323 [1:12:19<19:46:34,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 385K Sep  1 14:53 gen-word-790-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 709/12323 [1:12:26<19:46:33,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 709/12323 [1:12:26<19:46:33,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 151M Sep  1 14:53 gen-word-7900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 710/12323 [1:12:27<19:45:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 710/12323 [1:12:27<19:45:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 386K Sep  1 14:53 gen-word-795-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 711/12323 [1:12:35<19:45:32,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 711/12323 [1:12:35<19:45:32,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  43K Sep  1 14:53 gen-word-80-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 712/12323 [1:12:40<19:45:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 712/12323 [1:12:40<19:45:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 388K Sep  1 14:53 gen-word-800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 713/12323 [1:12:45<19:44:40,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 713/12323 [1:12:45<19:44:40,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 153M Sep  1 14:53 gen-word-8000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 714/12323 [1:12:47<19:43:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 714/12323 [1:12:47<19:43:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 389K Sep  1 14:53 gen-word-805-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 715/12323 [1:12:48<19:41:59,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 715/12323 [1:12:48<19:41:59,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 399K Sep  1 14:53 gen-word-810-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 716/12323 [1:12:56<19:42:30,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 716/12323 [1:12:56<19:42:30,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 403K Sep  1 14:53 gen-word-815-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 717/12323 [1:13:02<19:42:12,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 717/12323 [1:13:02<19:42:12,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 404K Sep  1 14:53 gen-word-820-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 718/12323 [1:13:10<19:42:44,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 718/12323 [1:13:10<19:42:44,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 406K Sep  1 14:53 gen-word-825-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 719/12323 [1:13:14<19:42:09,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 719/12323 [1:13:14<19:42:09,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 408K Sep  1 14:53 gen-word-830-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 720/12323 [1:13:16<19:40:54,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 720/12323 [1:13:16<19:40:54,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 410K Sep  1 14:53 gen-word-835-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 721/12323 [1:13:21<19:40:28,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 721/12323 [1:13:21<19:40:28,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 411K Sep  1 14:53 gen-word-840-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 722/12323 [1:13:27<19:40:11,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 722/12323 [1:13:27<19:40:11,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 416K Sep  1 14:53 gen-word-845-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 723/12323 [1:13:29<19:39:12,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 723/12323 [1:13:29<19:39:12,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  46K Sep  1 14:53 gen-word-85-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 724/12323 [1:13:38<19:39:44,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 724/12323 [1:13:38<19:39:44,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 423K Sep  1 14:53 gen-word-850-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 725/12323 [1:13:46<19:40:16,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 725/12323 [1:13:46<19:40:16,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 424K Sep  1 14:53 gen-word-855-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 726/12323 [1:13:48<19:39:02,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 726/12323 [1:13:48<19:39:02,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 427K Sep  1 14:53 gen-word-860-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 727/12323 [1:13:55<19:39:01,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 727/12323 [1:13:55<19:39:02,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 417K Sep  1 14:53 gen-word-865-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 728/12323 [1:13:59<19:38:36,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 728/12323 [1:13:59<19:38:36,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 420K Sep  1 14:53 gen-word-870-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 729/12323 [1:14:02<19:37:37,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 729/12323 [1:14:02<19:37:37,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 431K Sep  1 14:53 gen-word-875-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 730/12323 [1:14:08<19:37:20,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 730/12323 [1:14:08<19:37:20,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 434K Sep  1 14:53 gen-word-880-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 731/12323 [1:14:16<19:37:51,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 731/12323 [1:14:16<19:37:51,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 437K Sep  1 14:53 gen-word-885-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 732/12323 [1:14:20<19:37:09,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 732/12323 [1:14:20<19:37:09,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 434K Sep  1 14:53 gen-word-890-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 733/12323 [1:14:24<19:36:27,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 733/12323 [1:14:24<19:36:27,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 442K Sep  1 14:53 gen-word-895-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 734/12323 [1:14:29<19:36:10,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 734/12323 [1:14:29<19:36:10,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  52K Sep  1 14:53 gen-word-90-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 735/12323 [1:14:38<19:36:41,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 735/12323 [1:14:38<19:36:41,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 440K Sep  1 14:53 gen-word-900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 736/12323 [1:15:06<19:42:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 736/12323 [1:15:06<19:42:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 445K Sep  1 14:53 gen-word-905-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 737/12323 [1:15:13<19:42:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 737/12323 [1:15:13<19:42:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 450K Sep  1 14:53 gen-word-910-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 738/12323 [1:15:17<19:41:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 738/12323 [1:15:17<19:41:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 452K Sep  1 14:53 gen-word-915-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 739/12323 [1:15:25<19:42:21,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 739/12323 [1:15:25<19:42:21,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 453K Sep  1 14:53 gen-word-920-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 740/12323 [1:15:28<19:41:15,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 740/12323 [1:15:28<19:41:15,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 456K Sep  1 14:53 gen-word-925-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 741/12323 [1:15:34<19:41:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 741/12323 [1:15:34<19:41:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-930-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 742/12323 [1:15:40<19:41:13,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 742/12323 [1:15:40<19:41:13,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 453K Sep  1 14:53 gen-word-935-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 743/12323 [1:15:49<19:41:43,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 743/12323 [1:15:49<19:41:43,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-940-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 744/12323 [1:15:54<19:41:18,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 744/12323 [1:15:54<19:41:18,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-945-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 745/12323 [1:15:58<19:40:37,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 745/12323 [1:15:58<19:40:37,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  50K Sep  1 14:53 gen-word-95-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 746/12323 [1:16:03<19:40:19,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 746/12323 [1:16:03<19:40:19,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 469K Sep  1 14:53 gen-word-950-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 747/12323 [1:16:09<19:40:18,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 747/12323 [1:16:09<19:40:18,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-955-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 748/12323 [1:16:18<19:40:49,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 748/12323 [1:16:18<19:40:49,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 478K Sep  1 14:53 gen-word-960-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 749/12323 [1:16:22<19:40:07,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 749/12323 [1:16:22<19:40:07,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 472K Sep  1 14:53 gen-word-965-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 750/12323 [1:16:26<19:39:27,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 750/12323 [1:16:26<19:39:27,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 484K Sep  1 14:53 gen-word-970-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 751/12323 [1:16:34<19:39:55,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 751/12323 [1:16:34<19:39:55,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 478K Sep  1 14:53 gen-word-975-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 752/12323 [1:16:39<19:39:27,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 752/12323 [1:16:39<19:39:27,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 479K Sep  1 14:53 gen-word-980-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 753/12323 [1:16:44<19:39:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 753/12323 [1:16:44<19:39:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 482K Sep  1 14:53 gen-word-985-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 754/12323 [1:16:45<19:37:47,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 754/12323 [1:16:45<19:37:47,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 495K Sep  1 14:53 gen-word-990-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 755/12323 [1:16:53<19:38:02,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 755/12323 [1:16:53<19:38:02,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 480K Sep  1 14:53 gen-word-995-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 756/12323 [1:17:00<19:38:13,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 756/12323 [1:17:00<19:38:13,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  52K Sep  1 14:53 shuffle-word-10-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 757/12323 [1:17:08<19:38:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 757/12323 [1:17:08<19:38:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 758/12323 [1:17:10<19:37:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 758/12323 [1:17:10<19:37:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-1000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 759/12323 [1:17:14<19:36:57,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 759/12323 [1:17:14<19:36:57,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-105-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 760/12323 [1:17:21<19:36:55,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 760/12323 [1:17:21<19:36:55,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-110-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 761/12323 [1:17:28<19:37:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 761/12323 [1:17:28<19:37:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:53 shuffle-word-1100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 762/12323 [1:17:37<19:37:35,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 762/12323 [1:17:37<19:37:35,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-115-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 763/12323 [1:17:40<19:36:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 763/12323 [1:17:40<19:36:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-120-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 764/12323 [1:17:44<19:36:04,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 764/12323 [1:17:44<19:36:04,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep  1 14:53 shuffle-word-1200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 765/12323 [1:17:45<19:34:52,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 765/12323 [1:17:45<19:34:52,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-125-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 766/12323 [1:17:51<19:34:33,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 766/12323 [1:17:51<19:34:33,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-130-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 767/12323 [1:17:53<19:33:36,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 767/12323 [1:17:53<19:33:36,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 768/12323 [1:18:16<19:37:47,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 768/12323 [1:18:16<19:37:47,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-135-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 769/12323 [1:18:25<19:38:17,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 769/12323 [1:18:25<19:38:17,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-140-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 770/12323 [1:18:29<19:37:37,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 770/12323 [1:18:29<19:37:37,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-1400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 771/12323 [1:18:33<19:36:58,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 771/12323 [1:18:33<19:36:58,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-145-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 772/12323 [1:18:36<19:36:03,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 772/12323 [1:18:36<19:36:03,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  41K Sep  1 14:53 shuffle-word-15-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 773/12323 [1:18:40<19:35:38,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 773/12323 [1:18:40<19:35:38,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-150-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 774/12323 [1:18:44<19:34:58,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 774/12323 [1:18:44<19:34:58,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 775/12323 [1:18:53<19:35:26,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 775/12323 [1:18:53<19:35:26,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-155-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 776/12323 [1:18:54<19:34:16,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 776/12323 [1:18:54<19:34:16,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-160-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 777/12323 [1:18:59<19:33:50,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 777/12323 [1:18:59<19:33:50,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 778/12323 [1:19:03<19:33:03,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 778/12323 [1:19:03<19:33:03,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-165-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 779/12323 [1:19:09<19:33:00,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 779/12323 [1:19:09<19:33:00,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-170-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 780/12323 [1:19:11<19:31:50,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 780/12323 [1:19:11<19:31:50,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 781/12323 [1:19:19<19:32:17,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 781/12323 [1:19:19<19:32:17,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-175-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 782/12323 [1:19:24<19:31:58,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-180-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 782/12323 [1:19:24<19:31:58,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 518K Sep  1 14:53 shuffle-word-1800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 783/12323 [1:19:29<19:31:40,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 783/12323 [1:19:29<19:31:40,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-185-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 784/12323 [1:19:38<19:32:05,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 784/12323 [1:19:38<19:32:05,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-190-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 785/12323 [1:19:44<19:32:01,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 785/12323 [1:19:44<19:32:01,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-1900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 786/12323 [1:19:51<19:32:13,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 786/12323 [1:19:51<19:32:13,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-195-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 787/12323 [1:19:56<19:31:49,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 787/12323 [1:19:56<19:31:49,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  39K Sep  1 14:53 shuffle-word-20-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 788/12323 [1:19:58<19:30:39,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 788/12323 [1:19:58<19:30:39,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 789/12323 [1:20:02<19:30:07,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 789/12323 [1:20:02<19:30:07,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-2000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 790/12323 [1:20:07<19:29:36,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 790/12323 [1:20:07<19:29:36,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-205-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 791/12323 [1:20:13<19:29:35,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 791/12323 [1:20:13<19:29:35,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-210-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 792/12323 [1:20:21<19:30:03,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 792/12323 [1:20:21<19:30:03,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-2100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 793/12323 [1:20:24<19:29:09,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 793/12323 [1:20:24<19:29:09,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-215-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 794/12323 [1:20:33<19:29:37,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 794/12323 [1:20:33<19:29:37,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-220-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 795/12323 [1:20:38<19:29:19,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 795/12323 [1:20:38<19:29:19,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-2200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 796/12323 [1:20:41<19:28:32,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 796/12323 [1:20:41<19:28:32,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-225-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 797/12323 [1:20:44<19:27:44,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 797/12323 [1:20:44<19:27:44,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-230-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 798/12323 [1:20:47<19:26:50,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 798/12323 [1:20:47<19:26:50,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-2300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 799/12323 [1:20:51<19:26:10,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 799/12323 [1:20:51<19:26:10,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-235-count.jsonl\n"
+      "\r",
+      "Epoch 0:   6%| | 800/12323 [1:21:41<19:36:47,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 800/12323 [1:21:41<19:36:47,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-240-count.jsonl\n"
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:53 shuffle-word-2400-count.jsonl\n"
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-245-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 801/12323 [1:21:59<19:39:25,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 801/12323 [1:21:59<19:39:25,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  37K Sep  1 14:53 shuffle-word-25-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 802/12323 [1:22:07<19:39:39,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 802/12323 [1:22:07<19:39:39,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-250-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 803/12323 [1:22:15<19:40:06,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 803/12323 [1:22:15<19:40:06,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:53 shuffle-word-2500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 804/12323 [1:22:24<19:40:34,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 804/12323 [1:22:24<19:40:34,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-255-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 805/12323 [1:22:28<19:40:02,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 805/12323 [1:22:28<19:40:02,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-260-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 806/12323 [1:22:36<19:40:28,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 806/12323 [1:22:36<19:40:28,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 516K Sep  1 14:53 shuffle-word-2600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 807/12323 [1:22:38<19:39:19,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 807/12323 [1:22:38<19:39:19,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-265-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 808/12323 [1:22:43<19:39:01,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 808/12323 [1:22:43<19:39:01,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-270-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 809/12323 [1:22:48<19:38:30,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 809/12323 [1:22:48<19:38:30,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 511K Sep  1 14:53 shuffle-word-2700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 810/12323 [1:22:49<19:37:15,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 810/12323 [1:22:49<19:37:15,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-275-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 811/12323 [1:22:55<19:37:12,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 811/12323 [1:22:55<19:37:12,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-280-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 812/12323 [1:23:02<19:37:09,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 812/12323 [1:23:02<19:37:09,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-2800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 813/12323 [1:23:09<19:37:19,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 813/12323 [1:23:09<19:37:19,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-285-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 814/12323 [1:23:14<19:37:03,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 814/12323 [1:23:14<19:37:03,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-290-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 815/12323 [1:23:19<19:36:38,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 815/12323 [1:23:19<19:36:38,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-2900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 816/12323 [1:23:22<19:35:39,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 816/12323 [1:23:22<19:35:39,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-295-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 817/12323 [1:23:28<19:35:37,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 817/12323 [1:23:28<19:35:37,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  35K Sep  1 14:53 shuffle-word-30-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 818/12323 [1:23:31<19:34:52,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 818/12323 [1:23:31<19:34:52,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 819/12323 [1:23:36<19:34:22,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 819/12323 [1:23:36<19:34:22,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 820/12323 [1:23:41<19:33:58,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 820/12323 [1:23:41<19:33:58,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-305-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 821/12323 [1:23:42<19:32:44,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 821/12323 [1:23:42<19:32:44,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-310-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 822/12323 [1:23:51<19:33:11,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 822/12323 [1:23:51<19:33:11,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-3100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 823/12323 [1:23:55<19:32:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 823/12323 [1:23:55<19:32:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-315-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 824/12323 [1:24:01<19:32:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 824/12323 [1:24:01<19:32:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-320-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 825/12323 [1:24:06<19:32:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 825/12323 [1:24:06<19:32:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 826/12323 [1:24:10<19:31:31,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 826/12323 [1:24:10<19:31:31,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-325-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 827/12323 [1:24:14<19:31:01,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 827/12323 [1:24:14<19:31:01,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-330-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 828/12323 [1:24:19<19:30:45,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 828/12323 [1:24:19<19:30:45,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 829/12323 [1:24:24<19:30:16,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 829/12323 [1:24:24<19:30:16,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-335-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 830/12323 [1:24:32<19:30:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 830/12323 [1:24:32<19:30:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-340-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 831/12323 [1:24:37<19:30:21,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 831/12323 [1:24:37<19:30:21,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 832/12323 [1:25:07<19:35:46,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 832/12323 [1:25:07<19:35:46,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-345-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 833/12323 [1:25:12<19:35:23,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 833/12323 [1:25:12<19:35:23,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  33K Sep  1 14:53 shuffle-word-35-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 834/12323 [1:25:16<19:34:38,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 834/12323 [1:25:16<19:34:38,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-350-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 835/12323 [1:25:21<19:34:20,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 835/12323 [1:25:21<19:34:20,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 836/12323 [1:25:28<19:34:31,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 836/12323 [1:25:28<19:34:31,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-355-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 837/12323 [1:25:32<19:33:53,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 837/12323 [1:25:32<19:33:53,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-360-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 838/12323 [1:25:38<19:33:50,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 838/12323 [1:25:38<19:33:50,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 839/12323 [1:25:46<19:34:00,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 839/12323 [1:25:46<19:34:00,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-365-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 840/12323 [1:25:53<19:34:10,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 840/12323 [1:25:53<19:34:10,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-370-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 841/12323 [1:25:56<19:33:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 841/12323 [1:25:56<19:33:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 842/12323 [1:25:58<19:32:13,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 842/12323 [1:25:58<19:32:13,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-375-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 843/12323 [1:26:05<19:32:24,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 843/12323 [1:26:05<19:32:24,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-380-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 844/12323 [1:26:12<19:32:22,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 844/12323 [1:26:12<19:32:22,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 845/12323 [1:26:19<19:32:34,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 845/12323 [1:26:19<19:32:34,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-385-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 846/12323 [1:26:25<19:32:30,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 846/12323 [1:26:25<19:32:30,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-390-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 847/12323 [1:26:30<19:32:07,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 847/12323 [1:26:30<19:32:07,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 848/12323 [1:26:37<19:32:17,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 848/12323 [1:26:37<19:32:17,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-395-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 849/12323 [1:26:42<19:31:46,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 849/12323 [1:26:42<19:31:46,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:53 shuffle-word-40-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 850/12323 [1:26:46<19:31:16,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 850/12323 [1:26:46<19:31:16,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 851/12323 [1:26:49<19:30:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 851/12323 [1:26:49<19:30:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 852/12323 [1:26:53<19:29:53,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 852/12323 [1:26:53<19:29:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-405-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 853/12323 [1:26:59<19:29:38,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 853/12323 [1:26:59<19:29:38,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-410-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 854/12323 [1:27:04<19:29:22,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 854/12323 [1:27:04<19:29:22,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 855/12323 [1:27:06<19:28:25,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 855/12323 [1:27:06<19:28:25,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-415-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 856/12323 [1:27:15<19:28:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 856/12323 [1:27:15<19:28:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-420-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 857/12323 [1:27:23<19:29:12,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 857/12323 [1:27:23<19:29:12,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 858/12323 [1:27:31<19:29:29,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 858/12323 [1:27:31<19:29:29,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-425-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 859/12323 [1:27:34<19:28:38,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 859/12323 [1:27:34<19:28:38,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-430-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 860/12323 [1:27:41<19:28:49,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 860/12323 [1:27:41<19:28:49,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 861/12323 [1:27:49<19:29:13,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 861/12323 [1:27:49<19:29:13,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-435-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 862/12323 [1:27:55<19:28:57,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 862/12323 [1:27:55<19:28:57,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-440-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 863/12323 [1:28:03<19:29:20,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 863/12323 [1:28:03<19:29:20,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 864/12323 [1:28:22<19:32:08,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 864/12323 [1:28:22<19:32:08,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-445-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 865/12323 [1:28:28<19:31:51,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 865/12323 [1:28:28<19:31:51,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  34K Sep  1 14:53 shuffle-word-45-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 866/12323 [1:28:30<19:31:01,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 866/12323 [1:28:30<19:31:01,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-450-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 867/12323 [1:28:35<19:30:37,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 867/12323 [1:28:35<19:30:37,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 868/12323 [1:28:38<19:29:47,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 868/12323 [1:28:38<19:29:47,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-455-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 869/12323 [1:28:40<19:28:44,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 869/12323 [1:28:40<19:28:44,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-460-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 870/12323 [1:28:48<19:29:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 870/12323 [1:28:48<19:29:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 871/12323 [1:28:56<19:29:30,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 871/12323 [1:28:56<19:29:30,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-465-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 872/12323 [1:28:58<19:28:26,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 872/12323 [1:28:58<19:28:26,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-470-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 873/12323 [1:29:01<19:27:43,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 873/12323 [1:29:01<19:27:43,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 874/12323 [1:29:09<19:27:51,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 874/12323 [1:29:09<19:27:51,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-475-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 875/12323 [1:29:15<19:27:47,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 875/12323 [1:29:15<19:27:47,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-480-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 876/12323 [1:29:23<19:28:08,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 876/12323 [1:29:23<19:28:08,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 877/12323 [1:29:24<19:26:58,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 877/12323 [1:29:24<19:26:58,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-485-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 878/12323 [1:29:27<19:26:01,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 878/12323 [1:29:27<19:26:01,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-490-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 879/12323 [1:29:35<19:26:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 879/12323 [1:29:35<19:26:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 880/12323 [1:29:40<19:26:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 880/12323 [1:29:40<19:26:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-495-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 881/12323 [1:29:43<19:25:18,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 881/12323 [1:29:43<19:25:18,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  81K Sep  1 14:53 shuffle-word-5-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 882/12323 [1:29:47<19:24:41,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 882/12323 [1:29:47<19:24:41,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:53 shuffle-word-50-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 883/12323 [1:29:52<19:24:17,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 883/12323 [1:29:52<19:24:17,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 884/12323 [1:29:56<19:23:54,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 884/12323 [1:29:56<19:23:54,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-5000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 885/12323 [1:30:03<19:23:50,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 885/12323 [1:30:03<19:23:50,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-505-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 886/12323 [1:30:04<19:22:48,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 886/12323 [1:30:04<19:22:48,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-510-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 887/12323 [1:30:09<19:22:25,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 887/12323 [1:30:09<19:22:25,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 888/12323 [1:30:14<19:22:10,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 888/12323 [1:30:14<19:22:10,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-515-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 889/12323 [1:30:23<19:22:34,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 889/12323 [1:30:23<19:22:34,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-520-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 890/12323 [1:30:31<19:22:55,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 890/12323 [1:30:31<19:22:55,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 891/12323 [1:30:37<19:22:52,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 891/12323 [1:30:37<19:22:52,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-525-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 892/12323 [1:30:42<19:22:22,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 892/12323 [1:30:42<19:22:22,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-530-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 893/12323 [1:30:43<19:21:14,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 893/12323 [1:30:43<19:21:14,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 894/12323 [1:30:48<19:20:52,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 894/12323 [1:30:48<19:20:52,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-535-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 895/12323 [1:30:53<19:20:36,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 895/12323 [1:30:53<19:20:36,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-540-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 896/12323 [1:31:35<19:28:11,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 896/12323 [1:31:35<19:28:11,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 897/12323 [1:31:38<19:27:16,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 897/12323 [1:31:38<19:27:16,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-545-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 898/12323 [1:31:41<19:26:28,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 898/12323 [1:31:41<19:26:28,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-55-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 899/12323 [1:31:48<19:26:39,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 899/12323 [1:31:48<19:26:39,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-550-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 900/12323 [1:31:51<19:25:58,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 900/12323 [1:31:51<19:25:58,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 901/12323 [1:31:57<19:25:42,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 901/12323 [1:31:57<19:25:42,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-555-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 902/12323 [1:32:05<19:26:05,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 902/12323 [1:32:05<19:26:05,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-560-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 903/12323 [1:32:09<19:25:30,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 903/12323 [1:32:09<19:25:30,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 904/12323 [1:32:15<19:25:27,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 904/12323 [1:32:15<19:25:27,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-565-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 905/12323 [1:32:19<19:24:52,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 905/12323 [1:32:19<19:24:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-570-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 906/12323 [1:32:21<19:23:52,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 906/12323 [1:32:21<19:23:52,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-5700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 907/12323 [1:32:30<19:24:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 907/12323 [1:32:30<19:24:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-575-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 908/12323 [1:32:36<19:24:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 908/12323 [1:32:36<19:24:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-580-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 909/12323 [1:32:41<19:23:53,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 909/12323 [1:32:41<19:23:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 910/12323 [1:32:46<19:23:30,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 910/12323 [1:32:46<19:23:30,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-585-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 911/12323 [1:32:50<19:22:55,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 911/12323 [1:32:50<19:22:55,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-590-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 912/12323 [1:32:52<19:22:01,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 912/12323 [1:32:52<19:22:01,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-5900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 913/12323 [1:32:59<19:22:11,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 913/12323 [1:32:59<19:22:11,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-595-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 914/12323 [1:33:06<19:22:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 914/12323 [1:33:06<19:22:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-60-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 915/12323 [1:33:14<19:22:32,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 915/12323 [1:33:14<19:22:32,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 916/12323 [1:33:20<19:22:18,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 916/12323 [1:33:20<19:22:18,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 917/12323 [1:33:23<19:21:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 917/12323 [1:33:23<19:21:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-605-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 918/12323 [1:33:32<19:22:07,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 918/12323 [1:33:32<19:22:07,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-610-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 919/12323 [1:33:37<19:21:45,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 919/12323 [1:33:37<19:21:45,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 920/12323 [1:33:40<19:21:04,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 920/12323 [1:33:40<19:21:04,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-615-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 921/12323 [1:33:45<19:20:42,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 921/12323 [1:33:45<19:20:42,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-620-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 922/12323 [1:33:48<19:19:56,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 922/12323 [1:33:48<19:19:56,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 923/12323 [1:33:55<19:20:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 923/12323 [1:33:55<19:20:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-625-count.jsonl\n"
+      "\r",
+      "Epoch 0:   7%| | 924/12323 [1:34:04<19:20:30,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 924/12323 [1:34:04<19:20:30,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-630-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 925/12323 [1:34:09<19:20:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 925/12323 [1:34:09<19:20:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 926/12323 [1:34:15<19:20:07,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 926/12323 [1:34:15<19:20:07,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-635-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 927/12323 [1:34:21<19:20:04,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 927/12323 [1:34:21<19:20:04,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-640-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 928/12323 [1:34:43<19:23:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 928/12323 [1:34:43<19:23:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 929/12323 [1:34:45<19:22:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 929/12323 [1:34:45<19:22:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-645-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 930/12323 [1:34:49<19:21:36,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 930/12323 [1:34:49<19:21:36,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-65-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 931/12323 [1:34:56<19:21:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 931/12323 [1:34:56<19:21:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-650-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 932/12323 [1:35:03<19:21:46,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 932/12323 [1:35:03<19:21:46,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-6500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 933/12323 [1:35:07<19:21:12,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 933/12323 [1:35:07<19:21:12,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-655-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 934/12323 [1:35:14<19:21:23,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 934/12323 [1:35:14<19:21:23,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-660-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 935/12323 [1:35:23<19:21:45,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 935/12323 [1:35:23<19:21:45,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 936/12323 [1:35:24<19:20:40,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 936/12323 [1:35:24<19:20:40,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-665-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 937/12323 [1:35:29<19:20:20,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 937/12323 [1:35:29<19:20:20,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-670-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 938/12323 [1:35:36<19:20:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 938/12323 [1:35:36<19:20:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 939/12323 [1:35:43<19:20:28,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 939/12323 [1:35:43<19:20:28,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-675-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 940/12323 [1:35:49<19:20:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 940/12323 [1:35:49<19:20:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-680-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 941/12323 [1:35:54<19:20:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 941/12323 [1:35:54<19:20:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 942/12323 [1:35:59<19:19:39,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 942/12323 [1:35:59<19:19:39,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-685-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 943/12323 [1:36:02<19:19:05,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 943/12323 [1:36:02<19:19:05,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-690-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 944/12323 [1:36:08<19:18:49,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 944/12323 [1:36:08<19:18:49,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 945/12323 [1:36:14<19:18:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 945/12323 [1:36:14<19:18:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-695-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 946/12323 [1:36:15<19:17:42,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 946/12323 [1:36:15<19:17:42,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-70-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 947/12323 [1:36:24<19:18:02,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 947/12323 [1:36:24<19:18:02,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 948/12323 [1:36:32<19:18:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 948/12323 [1:36:32<19:18:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 949/12323 [1:36:40<19:18:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 949/12323 [1:36:40<19:18:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-705-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 950/12323 [1:36:47<19:18:40,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 950/12323 [1:36:47<19:18:40,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-710-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 951/12323 [1:36:49<19:17:53,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 951/12323 [1:36:49<19:17:53,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7100-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 952/12323 [1:36:55<19:17:38,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 952/12323 [1:36:55<19:17:38,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-715-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 953/12323 [1:36:59<19:17:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 953/12323 [1:36:59<19:17:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-720-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 954/12323 [1:37:06<19:17:20,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 954/12323 [1:37:06<19:17:20,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7200-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 955/12323 [1:37:10<19:16:47,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 955/12323 [1:37:10<19:16:47,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-725-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 956/12323 [1:37:12<19:15:49,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 956/12323 [1:37:12<19:15:49,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-730-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 957/12323 [1:37:16<19:15:17,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 957/12323 [1:37:16<19:15:17,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7300-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 958/12323 [1:37:24<19:15:39,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-735-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 958/12323 [1:37:24<19:15:39,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-740-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 959/12323 [1:37:26<19:14:36,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 959/12323 [1:37:26<19:14:36,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7400-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 960/12323 [1:37:54<19:18:49,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 960/12323 [1:37:54<19:18:49,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-745-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 961/12323 [1:37:58<19:18:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 961/12323 [1:37:58<19:18:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-75-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 962/12323 [1:38:06<19:18:37,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 962/12323 [1:38:06<19:18:37,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-750-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 963/12323 [1:38:12<19:18:34,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 963/12323 [1:38:12<19:18:34,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7500-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 964/12323 [1:38:21<19:18:53,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 964/12323 [1:38:21<19:18:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-755-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 965/12323 [1:38:28<19:19:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 965/12323 [1:38:28<19:19:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-760-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 966/12323 [1:38:33<19:18:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 966/12323 [1:38:33<19:18:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7600-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 967/12323 [1:38:35<19:17:45,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 967/12323 [1:38:35<19:17:45,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-765-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 968/12323 [1:38:43<19:18:07,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 968/12323 [1:38:43<19:18:07,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-770-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 969/12323 [1:38:51<19:18:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 969/12323 [1:38:51<19:18:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7700-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 970/12323 [1:38:56<19:18:00,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 970/12323 [1:38:56<19:18:00,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-775-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 971/12323 [1:39:03<19:18:07,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 971/12323 [1:39:03<19:18:07,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-780-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 972/12323 [1:39:11<19:18:27,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 972/12323 [1:39:11<19:18:27,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 973/12323 [1:39:20<19:18:46,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 973/12323 [1:39:20<19:18:46,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-785-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 974/12323 [1:39:25<19:18:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 974/12323 [1:39:25<19:18:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-790-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 975/12323 [1:39:28<19:17:52,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 975/12323 [1:39:28<19:17:52,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 976/12323 [1:39:35<19:17:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 976/12323 [1:39:35<19:17:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-795-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 977/12323 [1:39:43<19:18:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 977/12323 [1:39:43<19:18:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-80-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 978/12323 [1:39:50<19:18:05,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 978/12323 [1:39:50<19:18:05,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-800-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 979/12323 [1:39:55<19:17:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 979/12323 [1:39:55<19:17:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-8000-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 980/12323 [1:39:56<19:16:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 980/12323 [1:39:56<19:16:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-805-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 981/12323 [1:39:57<19:15:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 981/12323 [1:39:57<19:15:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-810-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 982/12323 [1:40:04<19:15:42,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 982/12323 [1:40:04<19:15:42,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-815-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 983/12323 [1:40:11<19:15:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 983/12323 [1:40:11<19:15:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-820-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 984/12323 [1:40:14<19:15:11,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 984/12323 [1:40:14<19:15:11,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-825-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 985/12323 [1:40:17<19:14:26,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 985/12323 [1:40:17<19:14:26,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-830-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 986/12323 [1:40:23<19:14:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 986/12323 [1:40:23<19:14:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-835-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 987/12323 [1:40:31<19:14:31,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 987/12323 [1:40:31<19:14:31,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-840-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 988/12323 [1:40:34<19:13:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 988/12323 [1:40:34<19:13:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-845-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 989/12323 [1:40:35<19:12:51,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 989/12323 [1:40:35<19:12:51,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-85-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 990/12323 [1:40:41<19:12:35,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 990/12323 [1:40:41<19:12:35,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-850-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 991/12323 [1:40:49<19:12:54,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 991/12323 [1:40:49<19:12:54,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-855-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 992/12323 [1:41:05<19:14:47,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 992/12323 [1:41:05<19:14:47,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-860-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 993/12323 [1:41:13<19:14:56,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 993/12323 [1:41:13<19:14:56,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-865-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 994/12323 [1:41:21<19:15:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 994/12323 [1:41:21<19:15:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-870-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 995/12323 [1:41:29<19:15:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 995/12323 [1:41:29<19:15:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-875-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 996/12323 [1:41:30<19:14:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 996/12323 [1:41:30<19:14:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-880-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 997/12323 [1:41:35<19:14:09,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 997/12323 [1:41:35<19:14:09,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-885-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 998/12323 [1:41:42<19:14:07,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 998/12323 [1:41:42<19:14:07,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-890-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 999/12323 [1:41:48<19:14:05,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 999/12323 [1:41:48<19:14:05,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-895-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1000/12323 [1:41:55<19:14:01,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1000/12323 [1:41:55<19:14:01,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-90-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1001/12323 [1:42:02<19:14:11,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1001/12323 [1:42:02<19:14:11,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-900-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1002/12323 [1:42:11<19:14:30,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1002/12323 [1:42:11<19:14:30,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-905-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1003/12323 [1:42:13<19:13:41,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1003/12323 [1:42:13<19:13:41,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-910-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1004/12323 [1:42:20<19:13:51,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1004/12323 [1:42:20<19:13:51,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-915-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1005/12323 [1:42:23<19:13:02,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1005/12323 [1:42:23<19:13:02,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-920-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1006/12323 [1:42:28<19:12:42,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1006/12323 [1:42:28<19:12:42,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-925-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1007/12323 [1:42:32<19:12:16,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1007/12323 [1:42:32<19:12:16,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-930-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1008/12323 [1:42:36<19:11:50,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-935-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1008/12323 [1:42:36<19:11:50,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-940-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1009/12323 [1:42:45<19:12:11,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1009/12323 [1:42:45<19:12:11,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-945-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1010/12323 [1:42:53<19:12:32,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1010/12323 [1:42:53<19:12:32,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-95-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1011/12323 [1:43:00<19:12:29,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1011/12323 [1:43:00<19:12:29,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-950-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1012/12323 [1:43:03<19:11:52,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1012/12323 [1:43:03<19:11:52,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-955-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1013/12323 [1:43:10<19:11:50,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1013/12323 [1:43:10<19:11:50,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-960-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1014/12323 [1:43:14<19:11:30,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1014/12323 [1:43:14<19:11:30,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-965-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1015/12323 [1:43:16<19:10:36,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1015/12323 [1:43:16<19:10:36,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-970-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1016/12323 [1:43:19<19:09:53,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1016/12323 [1:43:19<19:09:53,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-975-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1017/12323 [1:43:26<19:10:02,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1017/12323 [1:43:26<19:10:02,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-980-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1018/12323 [1:43:31<19:09:43,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1018/12323 [1:43:31<19:09:43,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-985-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1019/12323 [1:43:35<19:09:12,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1019/12323 [1:43:35<19:09:12,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-990-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1020/12323 [1:43:44<19:09:33,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1020/12323 [1:43:44<19:09:33,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-995-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1021/12323 [1:43:50<19:09:31,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1021/12323 [1:43:50<19:09:31,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 6.0K Sep  1 14:53 word-2-count.jsonl\n"
+      "\r",
+      "Epoch 0:   8%| | 1022/12323 [1:43:58<19:09:40,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1022/12323 [1:43:58<19:09:40,  6.10s/it, v_num=i2o7, train/loss"
      ]
-    }
-   ],
-   "source": [
-    "%%script bash\n",
-    "\n",
-    "########################################\n",
-    "# Generate the required jsonl dataset\n",
-    "########################################\n",
-    "\n",
-    "# Reset the dataset dir\n",
-    "mkdir -p ../dataset\n",
-    "rm -rf ../dataset/*.jsonl\n",
-    "\n",
-    "# Generate the various datasets\n",
-    "echo \"## Generating word reptition dataset ##\"\n",
-    "\n",
-    "#\n",
-    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
-    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
-    "#\n",
-    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 50 &\n",
-    "for i in {5..1000..5} \n",
-    "do\n",
-    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 50 & \n",
-    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
-    "done\n",
-    "\n",
-    "#\n",
-    "# Ramping up the 50+ - 4200 words dataset\n",
-    "# \n",
-    "for i in {1100..8000..100} \n",
-    "do\n",
-    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
-    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
-    "done\n",
-    "\n",
-    "wait\n",
-    "echo \"## Done ##\"\n",
-    "\n",
-    "ls -lh ../dataset/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "ead7aedd",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:18.175195Z",
-     "iopub.status.busy": "2023-09-01T14:53:18.175015Z",
-     "iopub.status.idle": "2023-09-01T14:53:38.004962Z",
-     "shell.execute_reply": "2023-09-01T14:53:38.004183Z"
-    },
-    "papermill": {
-     "duration": 19.943177,
-     "end_time": "2023-09-01T14:53:38.006545",
-     "exception": false,
-     "start_time": "2023-09-01T14:53:18.063368",
-     "status": "completed"
     },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1023/12323 [1:44:03<19:09:19,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1023/12323 [1:44:03<19:09:19,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1024/12323 [1:44:21<19:11:27,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1024/12323 [1:44:21<19:11:27,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'].\r\n",
-      "  rank_zero_warn(\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1025/12323 [1:44:23<19:10:33,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1025/12323 [1:44:23<19:10:33,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 3284035444\r\n",
-      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 3284035444\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1026/12323 [1:44:27<19:10:07,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1026/12323 [1:44:27<19:10:07,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1027/12323 [1:44:32<19:09:52,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1027/12323 [1:44:32<19:09:52,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230901_145324-1c6n4316\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/1c6n4316\u001b[0m\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in <module>\r\n",
-      "    cli_main()\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n",
-      "    LightningCLI(\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
-      "    self.instantiate_classes()\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
-      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
-      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
-      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
-      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
-      "    component.instantiate_class(component, cfg)\r\n",
-      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
-      "    parent[key] = group.group_class(**value)\r\n",
-      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth' does not exist\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/1c6n4316\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v17\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230901_145324-1c6n4316/logs\u001b[0m\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1028/12323 [1:44:40<19:10:09,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1028/12323 [1:44:40<19:10:09,  6.11s/it, v_num=i2o7, train/loss"
      ]
     }
    ],
@@ -8069,55 +35150,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "7ac5ad40",
+   "execution_count": null,
+   "id": "065aea13",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:38.115546Z",
-     "iopub.status.busy": "2023-09-01T14:53:38.115356Z",
-     "iopub.status.idle": "2023-09-01T14:53:40.987634Z",
-     "shell.execute_reply": "2023-09-01T14:53:40.986824Z"
-    },
     "papermill": {
-     "duration": 2.928097,
-     "end_time": "2023-09-01T14:53:40.989318",
-     "exception": false,
-     "start_time": "2023-09-01T14:53:38.061221",
-     "status": "completed"
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in <module>\r\n",
-      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
-      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
-      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
-      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
-      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ls: cannot access '../model/v5-L96-D1024-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Lets export the model from the checkpoint\n",
     "!cd \"{TRAINER_DIR}\" && \\\n",
@@ -8129,61 +35174,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "912275ec",
+   "execution_count": null,
+   "id": "e497d3dc",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:41.101106Z",
-     "iopub.status.busy": "2023-09-01T14:53:41.100914Z",
-     "iopub.status.idle": "2023-09-01T14:53:46.113705Z",
-     "shell.execute_reply": "2023-09-01T14:53:46.112987Z"
-    },
     "papermill": {
-     "duration": 5.068771,
-     "end_time": "2023-09-01T14:53:46.115376",
-     "exception": false,
-     "start_time": "2023-09-01T14:53:41.046605",
-     "status": "completed"
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
-      "    asyncio.run(main_function())\r\n",
-      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
-      "    return runner.run(main)\r\n",
-      "           ^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
-      "    return self._loop.run_until_complete(task)\r\n",
-      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
-      "    return future.result()\r\n",
-      "           ^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
-      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
-      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
-      "    self.model = RWKV(**model_config)\r\n",
-      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-L96-D1024-E0_1-mem-ctx-8k.pth' does not exist\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Lets do a quick memory test\n",
     "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
@@ -8192,61 +35195,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "71ea6284",
+   "execution_count": null,
+   "id": "e5ae95c7",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-01T14:53:46.225301Z",
-     "iopub.status.busy": "2023-09-01T14:53:46.225111Z",
-     "iopub.status.idle": "2023-09-01T14:53:51.386811Z",
-     "shell.execute_reply": "2023-09-01T14:53:51.386023Z"
-    },
     "papermill": {
-     "duration": 5.217798,
-     "end_time": "2023-09-01T14:53:51.388460",
-     "exception": false,
-     "start_time": "2023-09-01T14:53:46.170662",
-     "status": "completed"
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
     },
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
-      "    asyncio.run(main_function())\r\n",
-      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
-      "    return runner.run(main)\r\n",
-      "           ^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
-      "    return self._loop.run_until_complete(task)\r\n",
-      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
-      "    return future.result()\r\n",
-      "           ^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
-      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
-      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
-      "    self.model = RWKV(**model_config)\r\n",
-      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-L96-D1024-E0_1-mem-ctx-8k.pth' does not exist\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
     "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
@@ -8273,14 +35234,14 @@
   },
   "papermill": {
    "default_parameters": {},
-   "duration": 46.4696,
-   "end_time": "2023-09-01T14:53:51.661120",
+   "duration": null,
+   "end_time": null,
    "environment_variables": {},
    "exception": null,
    "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb",
    "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb",
    "parameters": {},
-   "start_time": "2023-09-01T14:53:05.191520",
+   "start_time": "2023-09-02T06:16:27.986890",
    "version": "2.4.0"
   }
  },