diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb"
--- "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb"
+++ "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb"
@@ -3,13 +3,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "5efa5cf6",
+   "id": "158fdf2c",
    "metadata": {
     "papermill": {
-     "duration": 0.004315,
-     "end_time": "2023-09-13T21:43:19.771432",
+     "duration": 0.004282,
+     "end_time": "2023-09-14T00:21:48.785213",
      "exception": false,
-     "start_time": "2023-09-13T21:43:19.767117",
+     "start_time": "2023-09-14T00:21:48.780931",
      "status": "completed"
     },
     "tags": []
@@ -25,13 +25,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "3f94c3fe",
+   "id": "792a73e5",
    "metadata": {
     "papermill": {
-     "duration": 0.002697,
-     "end_time": "2023-09-13T21:43:19.777118",
+     "duration": 0.002651,
+     "end_time": "2023-09-14T00:21:48.790826",
      "exception": false,
-     "start_time": "2023-09-13T21:43:19.774421",
+     "start_time": "2023-09-14T00:21:48.788175",
      "status": "completed"
     },
     "tags": []
@@ -43,19 +43,19 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "4c9a0a0b",
+   "id": "761b91e0",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:43:19.781369Z",
-     "iopub.status.busy": "2023-09-13T21:43:19.781083Z",
-     "iopub.status.idle": "2023-09-13T21:43:20.655752Z",
-     "shell.execute_reply": "2023-09-13T21:43:20.654921Z"
+     "iopub.execute_input": "2023-09-14T00:21:48.796049Z",
+     "iopub.status.busy": "2023-09-14T00:21:48.795747Z",
+     "iopub.status.idle": "2023-09-14T00:21:49.675797Z",
+     "shell.execute_reply": "2023-09-14T00:21:49.674913Z"
     },
     "papermill": {
-     "duration": 0.87905,
-     "end_time": "2023-09-13T21:43:20.657671",
+     "duration": 0.88445,
+     "end_time": "2023-09-14T00:21:49.677690",
      "exception": false,
-     "start_time": "2023-09-13T21:43:19.778621",
+     "start_time": "2023-09-14T00:21:48.793240",
      "status": "completed"
     },
     "tags": []
@@ -83,19 +83,19 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "3054f355",
+   "id": "d8775637",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:43:20.664999Z",
-     "iopub.status.busy": "2023-09-13T21:43:20.664744Z",
-     "iopub.status.idle": "2023-09-13T21:43:22.809995Z",
-     "shell.execute_reply": "2023-09-13T21:43:22.809266Z"
+     "iopub.execute_input": "2023-09-14T00:21:49.684909Z",
+     "iopub.status.busy": "2023-09-14T00:21:49.684672Z",
+     "iopub.status.idle": "2023-09-14T00:21:51.806309Z",
+     "shell.execute_reply": "2023-09-14T00:21:51.805544Z"
     },
     "papermill": {
-     "duration": 2.15121,
-     "end_time": "2023-09-13T21:43:22.812140",
+     "duration": 2.127403,
+     "end_time": "2023-09-14T00:21:51.808325",
      "exception": false,
-     "start_time": "2023-09-13T21:43:20.660930",
+     "start_time": "2023-09-14T00:21:49.680922",
      "status": "completed"
     },
     "tags": []
@@ -118,19 +118,19 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "cb2a6c86",
+   "id": "bda1d282",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:43:22.819682Z",
-     "iopub.status.busy": "2023-09-13T21:43:22.819442Z",
-     "iopub.status.idle": "2023-09-13T21:43:22.828417Z",
-     "shell.execute_reply": "2023-09-13T21:43:22.827850Z"
+     "iopub.execute_input": "2023-09-14T00:21:51.816177Z",
+     "iopub.status.busy": "2023-09-14T00:21:51.815928Z",
+     "iopub.status.idle": "2023-09-14T00:21:51.824598Z",
+     "shell.execute_reply": "2023-09-14T00:21:51.823913Z"
     },
     "papermill": {
-     "duration": 0.014595,
-     "end_time": "2023-09-13T21:43:22.830049",
+     "duration": 0.014641,
+     "end_time": "2023-09-14T00:21:51.826277",
      "exception": false,
-     "start_time": "2023-09-13T21:43:22.815454",
+     "start_time": "2023-09-14T00:21:51.811636",
      "status": "completed"
     },
     "tags": []
@@ -157,7 +157,7 @@
     "ENABLE_WANDB=True\n",
     "\n",
     "# Layer count and embed dim to start with\n",
-    "LAYER_COUNT=12\n",
+    "LAYER_COUNT=6\n",
     "EMBED_DIM=2560\n",
     "\n",
     "EMBED_SCALE=0.1\n",
@@ -197,19 +197,19 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "9e3a24cf",
+   "id": "951b741e",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:43:22.837873Z",
-     "iopub.status.busy": "2023-09-13T21:43:22.837614Z",
-     "iopub.status.idle": "2023-09-13T21:43:23.592874Z",
-     "shell.execute_reply": "2023-09-13T21:43:23.592085Z"
+     "iopub.execute_input": "2023-09-14T00:21:51.833577Z",
+     "iopub.status.busy": "2023-09-14T00:21:51.833353Z",
+     "iopub.status.idle": "2023-09-14T00:23:39.537522Z",
+     "shell.execute_reply": "2023-09-14T00:23:39.536676Z"
     },
     "papermill": {
-     "duration": 0.761153,
-     "end_time": "2023-09-13T21:43:23.594701",
+     "duration": 107.709983,
+     "end_time": "2023-09-14T00:23:39.539513",
      "exception": false,
-     "start_time": "2023-09-13T21:43:22.833548",
+     "start_time": "2023-09-14T00:21:51.829530",
      "status": "completed"
     },
     "tags": []
@@ -219,9 +219,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2023-09-13 21:43:22--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L12-D2560-E0_1-mem-ctx-512.pth\r\n",
-      "Resolving huggingface.co (huggingface.co)... 13.33.33.20, 13.33.33.55, 13.33.33.110, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|13.33.33.20|:443... connected.\r\n",
+      "--2023-09-14 00:21:51--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth\r\n",
+      "Resolving huggingface.co (huggingface.co)... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "13.33.33.55, 13.33.33.110, 13.33.33.102, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.\r\n",
       "HTTP request sent, awaiting response... "
      ]
     },
@@ -229,2904 +236,6634 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "404 Not Found\r\n",
-      "2023-09-13 21:43:23 ERROR 404: Not Found.\r\n",
-      "\r\n"
+      "302 Found\r\n",
+      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2560-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2560-E0_1-mem-ctx-512.pth%22%3B&Expires=1694910112&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxMDExMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhY2QzYTg5MTM3MTBlN2MxNzcxOTU0N2M1NWRjZGUwMjgyNmNlMmQ1OTI2MjZjMDMzOWU0MmIzOTQ4NTg0OTg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WM4HZnIOKrH24paW4nOk1cHO9YHki8seMtQ6g3vGWI7sYyvPtz%7EXzbI4q%7EME0hvvhjAcVa1%7EUWwlWKF4I1ek7wHZOZ9ySyH0VaZ4HCTI0Zx9XlaT%7E62wMWO854tDrU5iHFVfMP59Rr%7EbQCkiwanrgwg5NC8iCw7uL5t2a-LvME3l0m65K5SzgC-0IEn4nVrXpnvdCmNaBNSNecwoP8yEYIv-0%7E-yeTK0j7dVnIifdmJY6pB4UiIPfOU--LckTIv8c%7EvvUtg4DWRMEspqC%7E%7EOquCGo3OAUgIZVvyhY9nzaWsJbQoRv3DyaWyuh8nKZW%7E8-99fEW8tfubjL3gqQBLqOw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
+      "--2023-09-14 00:21:52--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2560-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2560-E0_1-mem-ctx-512.pth%22%3B&Expires=1694910112&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxMDExMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhY2QzYTg5MTM3MTBlN2MxNzcxOTU0N2M1NWRjZGUwMjgyNmNlMmQ1OTI2MjZjMDMzOWU0MmIzOTQ4NTg0OTg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WM4HZnIOKrH24paW4nOk1cHO9YHki8seMtQ6g3vGWI7sYyvPtz%7EXzbI4q%7EME0hvvhjAcVa1%7EUWwlWKF4I1ek7wHZOZ9ySyH0VaZ4HCTI0Zx9XlaT%7E62wMWO854tDrU5iHFVfMP59Rr%7EbQCkiwanrgwg5NC8iCw7uL5t2a-LvME3l0m65K5SzgC-0IEn4nVrXpnvdCmNaBNSNecwoP8yEYIv-0%7E-yeTK0j7dVnIifdmJY6pB4UiIPfOU--LckTIv8c%7EvvUtg4DWRMEspqC%7E%7EOquCGo3OAUgIZVvyhY9nzaWsJbQoRv3DyaWyuh8nKZW%7E8-99fEW8tfubjL3gqQBLqOw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
+      "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "total 10K\r\n",
-      "drwxr-xr-x  2 root root  2 Sep 13 21:43 .\r\n",
-      "drwxr-xr-x 20 root root 24 Sep 13 21:43 ..\r\n"
+      "18.155.68.128, 18.155.68.94, 18.155.68.73, ...\r\n",
+      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.128|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
      ]
-    }
-   ],
-   "source": [
-    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
-    "\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    ls -alh ."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d046d1e0",
-   "metadata": {
-    "papermill": {
-     "duration": 0.003341,
-     "end_time": "2023-09-13T21:43:23.601885",
-     "exception": false,
-     "start_time": "2023-09-13T21:43:23.598544",
-     "status": "completed"
     },
-    "tags": []
-   },
-   "source": [
-    "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
-    "\n",
-    "- Tune 3: Large ctx size (8192), Scaling up!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "c2fb4c25",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-13T21:43:23.608478Z",
-     "iopub.status.busy": "2023-09-13T21:43:23.608215Z",
-     "iopub.status.idle": "2023-09-13T21:43:47.016389Z",
-     "shell.execute_reply": "2023-09-13T21:43:47.015749Z"
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "200 OK\r\n",
+      "Length: 1537632513 (1.4G) [binary/octet-stream]\r\n",
+      "Saving to: ‘v5r3-L6-D2560-E0_1-mem-ctx-512.pth’\r\n",
+      "\r\n",
+      "\r",
+      "          v5r3-L6-D   0%[                    ]       0  --.-KB/s               "
+     ]
     },
-    "papermill": {
-     "duration": 23.441437,
-     "end_time": "2023-09-13T21:43:47.046834",
-     "exception": false,
-     "start_time": "2023-09-13T21:43:23.605397",
-     "status": "completed"
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2   0%[                    ]  18.27K  81.1KB/s               "
+     ]
     },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Generating word reptition dataset ##\n"
+      "\r",
+      "        v5r3-L6-D25   0%[                    ]  58.27K   129KB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256   0%[                    ] 135.27K   199KB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560   0%[                    ] 296.27K   327KB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-   0%[                    ] 602.27K   532KB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E   0%[                    ]   1.20M   907KB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0   0%[                    ]   2.42M  1.52MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_   0%[                    ]   4.85M  2.68MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 870 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1   0%[                    ]   8.65M  4.24MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-   0%[                    ]  12.56M  5.53MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m   1%[                    ]  16.43M  6.58MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 1060 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me   1%[                    ]  20.21M  7.41MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 1766 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem   1%[                    ]  24.07M  8.14MB/s               "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-   1%[                    ]  27.84M  8.74MB/s    eta 2m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c   2%[                    ]  31.63M  9.26MB/s    eta 2m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct   2%[                    ]  35.51M  9.74MB/s    eta 2m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx   2%[                    ]  39.34M  10.2MB/s    eta 2m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-   2%[                    ]  43.20M  10.5MB/s    eta 2m 45s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 374 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5   3%[                    ]  47.09M  10.9MB/s    eta 2m 11s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51   3%[                    ]  50.88M  11.2MB/s    eta 2m 11s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 763 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512   3%[                    ]  54.60M  12.0MB/s    eta 2m 11s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 348 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.   3%[                    ]  58.48M  12.8MB/s    eta 2m 11s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p   4%[                    ]  62.38M  13.6MB/s    eta 2m 11s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 665 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt   4%[                    ]  66.21M  14.4MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth   4%[                    ]  70.13M  15.2MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 2613 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth    5%[>                   ]  73.90M  15.9MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth     5%[>                   ]  77.82M  16.5MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 100 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth      5%[>                   ]  81.73M  16.8MB/s    eta 1m 56s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5601 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth       5%[>                   ]  85.54M  16.8MB/s    eta 1m 47s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth        6%[>                   ]  89.46M  16.8MB/s    eta 1m 47s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 51 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth         6%[>                   ]  93.34M  16.8MB/s    eta 1m 47s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth          6%[>                   ]  97.13M  16.8MB/s    eta 1m 47s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+      "\r",
+      "ctx-512.pth           6%[>                   ] 101.04M  16.8MB/s    eta 1m 47s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+      "\r",
+      "tx-512.pth            7%[>                   ] 104.82M  16.8MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 80 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+      "\r",
+      "x-512.pth             7%[>                   ] 108.63M  16.8MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+      "\r",
+      "-512.pth              7%[>                   ] 112.42M  16.8MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+      "\r",
+      "512.pth               7%[>                   ] 116.21M  16.8MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+      "\r",
+      "12.pth                7%[>                   ] 117.20M  16.1MB/s    eta 1m 41s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+      "\r",
+      "2.pth                 8%[>                   ] 120.17M  15.9MB/s    eta 1m 40s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 38 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+      "\r",
+      ".pth                  8%[>                   ] 123.92M  15.9MB/s    eta 1m 40s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+      "\r",
+      "pth                   8%[>                   ] 127.67M  15.9MB/s    eta 1m 40s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+      "\r",
+      "th                    8%[>                   ] 130.81M  15.8MB/s    eta 1m 40s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+      "\r",
+      "h                     9%[>                   ] 134.63M  15.8MB/s    eta 1m 40s "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 257 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+      "\r",
+      "                      9%[>                   ] 138.42M  15.8MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+      "\r",
+      "                  v   9%[>                   ] 142.31M  15.7MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 185 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+      "\r",
+      "                 v5   9%[>                   ] 145.38M  15.6MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+      "\r",
+      "                v5r  10%[=>                  ] 149.13M  15.6MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+      "\r",
+      "               v5r3  10%[=>                  ] 152.09M  15.4MB/s    eta 97s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+      "\r",
+      "              v5r3-  10%[=>                  ] 155.88M  15.3MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+      "\r",
+      "             v5r3-L  10%[=>                  ] 159.67M  15.3MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  11%[=>                  ] 162.78M  15.1MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  11%[=>                  ] 166.62M  15.2MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  11%[=>                  ] 170.42M  15.1MB/s    eta 94s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  11%[=>                  ] 174.28M  15.1MB/s    eta 92s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  12%[=>                  ] 178.38M  15.2MB/s    eta 92s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  12%[=>                  ] 182.15M  15.2MB/s    eta 92s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 293 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  12%[=>                  ] 185.90M  15.2MB/s    eta 92s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 1301 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  12%[=>                  ] 189.65M  15.8MB/s    eta 92s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 318 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  13%[=>                  ] 193.63M  16.0MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  13%[=>                  ] 197.46M  16.0MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  13%[=>                  ] 199.09M  15.5MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  13%[=>                  ] 202.45M  15.5MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 330 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  14%[=>                  ] 206.15M  15.6MB/s    eta 89s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  14%[=>                  ] 209.87M  15.5MB/s    eta 88s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 283 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  14%[=>                  ] 213.65M  15.5MB/s    eta 88s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  14%[=>                  ] 217.40M  15.7MB/s    eta 88s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  15%[==>                 ] 221.12M  15.7MB/s    eta 88s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  15%[==>                 ] 224.95M  15.8MB/s    eta 88s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  15%[==>                 ] 228.85M  15.9MB/s    eta 85s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  15%[==>                 ] 232.63M  15.9MB/s    eta 85s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 447 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  16%[==>                 ] 236.48M  16.0MB/s    eta 85s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  16%[==>                 ] 240.31M  16.0MB/s    eta 85s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 477 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  16%[==>                 ] 244.15M  16.0MB/s    eta 85s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 408 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  16%[==>                 ] 247.99M  16.0MB/s    eta 83s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  17%[==>                 ] 251.73M  15.9MB/s    eta 83s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  17%[==>                 ] 255.59M  16.0MB/s    eta 83s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 80 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  17%[==>                 ] 259.38M  16.0MB/s    eta 83s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  17%[==>                 ] 263.13M  16.0MB/s    eta 83s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 275 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   18%[==>                 ] 266.95M  15.9MB/s    eta 81s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    18%[==>                 ] 270.76M  15.9MB/s    eta 81s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     18%[==>                 ] 274.51M  16.4MB/s    eta 81s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      18%[==>                 ] 278.38M  16.5MB/s    eta 81s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 531 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       19%[==>                 ] 282.12M  16.5MB/s    eta 81s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        19%[==>                 ] 285.85M  16.5MB/s    eta 79s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         19%[==>                 ] 289.57M  16.5MB/s    eta 79s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          20%[===>                ] 293.38M  16.5MB/s    eta 79s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+      "\r",
+      "tx-512.pth           20%[===>                ] 297.13M  16.6MB/s    eta 79s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+      "\r",
+      "x-512.pth            20%[===>                ] 300.90M  16.5MB/s    eta 79s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 140 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+      "\r",
+      "-512.pth             20%[===>                ] 304.67M  16.5MB/s    eta 78s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+      "\r",
+      "512.pth              21%[===>                ] 308.38M  16.5MB/s    eta 78s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+      "\r",
+      "12.pth               21%[===>                ] 312.13M  16.5MB/s    eta 78s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+      "\r",
+      "2.pth                21%[===>                ] 316.20M  16.5MB/s    eta 78s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+      "\r",
+      ".pth                 21%[===>                ] 319.92M  16.5MB/s    eta 78s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+      "\r",
+      "pth                  22%[===>                ] 323.67M  16.5MB/s    eta 76s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+      "\r",
+      "th                   22%[===>                ] 327.51M  16.5MB/s    eta 76s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+      "\r",
+      "h                    22%[===>                ] 331.24M  16.5MB/s    eta 76s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+      "\r",
+      "                     22%[===>                ] 335.06M  16.5MB/s    eta 76s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 532 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+      "\r",
+      "                  v  23%[===>                ] 338.79M  16.5MB/s    eta 76s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+      "\r",
+      "                 v5  23%[===>                ] 342.60M  16.5MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+      "\r",
+      "                v5r  23%[===>                ] 346.38M  16.5MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+      "\r",
+      "               v5r3  23%[===>                ] 350.20M  16.5MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+      "\r",
+      "              v5r3-  24%[===>                ] 354.12M  16.5MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+      "\r",
+      "             v5r3-L  24%[===>                ] 357.88M  16.5MB/s    eta 74s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  24%[===>                ] 361.63M  16.5MB/s    eta 73s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  24%[===>                ] 365.38M  16.5MB/s    eta 73s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  25%[====>               ] 369.10M  16.5MB/s    eta 73s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  25%[====>               ] 372.92M  16.5MB/s    eta 73s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  25%[====>               ] 376.79M  16.5MB/s    eta 73s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  25%[====>               ] 380.63M  16.5MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  26%[====>               ] 384.38M  16.5MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  26%[====>               ] 387.63M  16.4MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  26%[====>               ] 388.07M  15.7MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  26%[====>               ] 391.90M  15.7MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  26%[====>               ] 394.49M  15.5MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  27%[====>               ] 397.45M  15.3MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  27%[====>               ] 400.42M  15.1MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  27%[====>               ] 403.48M  15.0MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  27%[====>               ] 406.03M  14.7MB/s    eta 71s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 25 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  27%[====>               ] 409.13M  14.6MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 26 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  28%[====>               ] 412.31M  14.4MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  28%[====>               ] 415.51M  14.3MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  28%[====>               ] 418.73M  14.2MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  28%[====>               ] 421.73M  14.0MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  28%[====>               ] 424.85M  13.8MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  29%[====>               ] 428.06M  13.7MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  29%[====>               ] 431.29M  13.6MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  29%[====>               ] 434.60M  13.5MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  29%[====>               ] 437.81M  13.4MB/s    eta 70s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  30%[=====>              ] 441.10M  13.3MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  30%[=====>              ] 444.49M  13.2MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  30%[=====>              ] 447.85M  13.2MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   30%[=====>              ] 451.23M  13.8MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    31%[=====>              ] 454.65M  13.8MB/s    eta 69s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     31%[=====>              ] 458.06M  13.9MB/s    eta 68s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      31%[=====>              ] 461.48M  14.0MB/s    eta 68s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       31%[=====>              ] 464.90M  14.2MB/s    eta 68s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        31%[=====>              ] 468.31M  14.2MB/s    eta 68s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         32%[=====>              ] 471.76M  14.4MB/s    eta 68s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          32%[=====>              ] 475.26M  14.4MB/s    eta 66s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+      "\r",
+      "tx-512.pth           32%[=====>              ] 477.73M  14.3MB/s    eta 66s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+      "\r",
+      "x-512.pth            32%[=====>              ] 481.17M  14.3MB/s    eta 66s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+      "\r",
+      "-512.pth             33%[=====>              ] 484.62M  14.4MB/s    eta 66s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+      "\r",
+      "512.pth              33%[=====>              ] 488.09M  14.5MB/s    eta 66s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+      "\r",
+      "12.pth               33%[=====>              ] 491.59M  14.6MB/s    eta 65s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+      "\r",
+      "2.pth                33%[=====>              ] 495.10M  14.7MB/s    eta 65s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+      "\r",
+      ".pth                 33%[=====>              ] 498.07M  14.6MB/s    eta 65s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+      "\r",
+      "pth                  34%[=====>              ] 501.29M  14.6MB/s    eta 65s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+      "\r",
+      "th                   34%[=====>              ] 504.70M  14.6MB/s    eta 65s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+      "\r",
+      "h                    34%[=====>              ] 508.23M  14.6MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+      "\r",
+      "                     34%[=====>              ] 511.73M  14.7MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+      "\r",
+      "                  v  35%[======>             ] 515.20M  14.7MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+      "\r",
+      "                 v5  35%[======>             ] 518.76M  14.8MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+      "\r",
+      "                v5r  35%[======>             ] 521.78M  14.7MB/s    eta 64s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+      "\r",
+      "               v5r3  35%[======>             ] 525.34M  14.7MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+      "\r",
+      "              v5r3-  36%[======>             ] 528.79M  14.7MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+      "\r",
+      "             v5r3-L  36%[======>             ] 532.29M  14.7MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  36%[======>             ] 535.76M  14.7MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  36%[======>             ] 539.28M  14.7MB/s    eta 63s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  36%[======>             ] 541.87M  14.6MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  37%[======>             ] 545.32M  14.8MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  37%[======>             ] 548.84M  14.8MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  37%[======>             ] 552.34M  14.9MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  37%[======>             ] 555.78M  14.8MB/s    eta 62s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  38%[======>             ] 559.32M  14.9MB/s    eta 61s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  38%[======>             ] 561.92M  14.6MB/s    eta 61s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  38%[======>             ] 565.42M  14.8MB/s    eta 61s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  38%[======>             ] 568.92M  14.9MB/s    eta 61s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  39%[======>             ] 572.49M  14.9MB/s    eta 61s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  39%[======>             ] 575.17M  14.7MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  39%[======>             ] 578.63M  14.7MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  39%[======>             ] 582.20M  14.7MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  39%[======>             ] 585.71M  14.7MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  40%[=======>            ] 589.18M  14.8MB/s    eta 60s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  40%[=======>            ] 592.70M  14.8MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  40%[=======>            ] 596.18M  14.8MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  40%[=======>            ] 599.78M  14.8MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  41%[=======>            ] 602.70M  14.6MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  41%[=======>            ] 606.18M  14.6MB/s    eta 59s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  41%[=======>            ] 609.79M  14.8MB/s    eta 58s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  41%[=======>            ] 613.31M  14.8MB/s    eta 58s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  42%[=======>            ] 615.92M  14.6MB/s    eta 58s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  42%[=======>            ] 619.49M  14.6MB/s    eta 58s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  42%[=======>            ] 623.07M  14.7MB/s    eta 58s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  42%[=======>            ] 626.73M  14.7MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   42%[=======>            ] 630.40M  14.9MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    43%[=======>            ] 634.06M  15.0MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     43%[=======>            ] 637.68M  15.0MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      43%[=======>            ] 641.40M  15.0MB/s    eta 56s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       43%[=======>            ] 645.04M  15.3MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        44%[=======>            ] 648.71M  15.3MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         44%[=======>            ] 652.49M  15.4MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          44%[=======>            ] 656.26M  15.4MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+      "\r",
+      "tx-512.pth           45%[========>           ] 660.07M  15.3MB/s    eta 55s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+      "\r",
+      "x-512.pth            45%[========>           ] 663.84M  15.4MB/s    eta 54s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+      "\r",
+      "-512.pth             45%[========>           ] 667.57M  15.4MB/s    eta 54s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+      "\r",
+      "512.pth              45%[========>           ] 671.35M  15.4MB/s    eta 54s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+      "\r",
+      "12.pth               46%[========>           ] 675.13M  15.7MB/s    eta 54s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+      "\r",
+      "2.pth                46%[========>           ] 678.79M  15.8MB/s    eta 54s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+      "\r",
+      ".pth                 46%[========>           ] 682.62M  15.8MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+      "\r",
+      "pth                  46%[========>           ] 686.34M  15.8MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+      "\r",
+      "th                   47%[========>           ] 690.13M  16.1MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+      "\r",
+      "h                    47%[========>           ] 693.85M  16.1MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Done ##\n"
+      "\r",
+      "                     47%[========>           ] 697.71M  16.2MB/s    eta 52s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "total 2.5G\n"
+      "\r",
+      "                  v  47%[========>           ] 701.46M  16.2MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  20K Sep 13 21:43 gen-word-10-count.jsonl\n"
+      "\r",
+      "                 v5  48%[========>           ] 705.26M  16.2MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 2.1M Sep 13 21:43 gen-word-100-count.jsonl\n"
+      "\r",
+      "                v5r  48%[========>           ] 709.01M  16.2MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  20M Sep 13 21:43 gen-word-1000-count.jsonl\n"
+      "\r",
+      "               v5r3  48%[========>           ] 712.85M  16.2MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  22M Sep 13 21:43 gen-word-1100-count.jsonl\n"
+      "\r",
+      "              v5r3-  48%[========>           ] 716.67M  16.2MB/s    eta 51s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  23M Sep 13 21:43 gen-word-1200-count.jsonl\n"
+      "\r",
+      "             v5r3-L  49%[========>           ] 719.21M  16.0MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  25M Sep 13 21:43 gen-word-1300-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  49%[========>           ] 722.99M  16.0MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27M Sep 13 21:43 gen-word-1400-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  49%[========>           ] 726.73M  16.0MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  25K Sep 13 21:43 gen-word-15-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  49%[========>           ] 730.45M  16.0MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29M Sep 13 21:43 gen-word-1500-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  50%[=========>          ] 734.38M  16.2MB/s    eta 50s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31M Sep 13 21:43 gen-word-1600-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  50%[=========>          ] 738.12M  16.2MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  33M Sep 13 21:43 gen-word-1700-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  50%[=========>          ] 741.13M  16.0MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  35M Sep 13 21:43 gen-word-1800-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  50%[=========>          ] 741.59M  15.3MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  37M Sep 13 21:43 gen-word-1900-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  50%[=========>          ] 745.43M  15.3MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep 13 21:43 gen-word-20-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  51%[=========>          ] 747.93M  15.0MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 4.0M Sep 13 21:43 gen-word-200-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  51%[=========>          ] 750.78M  14.8MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  39M Sep 13 21:43 gen-word-2000-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  51%[=========>          ] 752.87M  14.4MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  41M Sep 13 21:43 gen-word-2100-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  51%[=========>          ] 755.03M  14.1MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  42M Sep 13 21:43 gen-word-2200-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  51%[=========>          ] 757.21M  13.8MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  44M Sep 13 21:43 gen-word-2300-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  51%[=========>          ] 759.43M  13.4MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  46M Sep 13 21:43 gen-word-2400-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  51%[=========>          ] 761.67M  13.1MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  34K Sep 13 21:43 gen-word-25-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  52%[=========>          ] 763.93M  12.8MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  48M Sep 13 21:43 gen-word-2500-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  52%[=========>          ] 766.23M  12.5MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  50M Sep 13 21:43 gen-word-2600-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  52%[=========>          ] 768.54M  12.1MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  52M Sep 13 21:43 gen-word-2700-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  52%[=========>          ] 770.87M  11.8MB/s    eta 48s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  54M Sep 13 21:43 gen-word-2800-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  52%[=========>          ] 773.23M  11.8MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  56M Sep 13 21:43 gen-word-2900-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  52%[=========>          ] 775.60M  11.5MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  41K Sep 13 21:43 gen-word-30-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  53%[=========>          ] 777.99M  11.2MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 5.9M Sep 13 21:43 gen-word-300-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  53%[=========>          ] 780.40M  10.9MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  58M Sep 13 21:43 gen-word-3000-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  53%[=========>          ] 782.82M  10.6MB/s    eta 47s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  60M Sep 13 21:43 gen-word-3100-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  53%[=========>          ] 785.24M  10.3MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  62M Sep 13 21:43 gen-word-3200-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  53%[=========>          ] 787.70M  10.2MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  63M Sep 13 21:43 gen-word-3300-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  53%[=========>          ] 790.15M  10.6MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  65M Sep 13 21:43 gen-word-3400-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  54%[=========>          ] 792.63M  10.3MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  46K Sep 13 21:43 gen-word-35-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   54%[=========>          ] 795.10M  10.4MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  67M Sep 13 21:43 gen-word-3500-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    54%[=========>          ] 797.60M  10.3MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69M Sep 13 21:43 gen-word-3600-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     54%[=========>          ] 800.10M  10.4MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  71M Sep 13 21:43 gen-word-3700-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      54%[=========>          ] 802.60M  10.5MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  73M Sep 13 21:43 gen-word-3800-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       54%[=========>          ] 805.12M  10.6MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  75M Sep 13 21:43 gen-word-3900-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        55%[==========>         ] 807.63M  10.6MB/s    eta 46s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  50K Sep 13 21:43 gen-word-40-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         55%[==========>         ] 810.17M  10.7MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 7.9M Sep 13 21:43 gen-word-400-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          55%[==========>         ] 812.70M  10.8MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  77M Sep 13 21:43 gen-word-4000-count.jsonl\n"
+      "\r",
+      "tx-512.pth           55%[==========>         ] 815.23M  10.8MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  78M Sep 13 21:43 gen-word-4100-count.jsonl\n"
+      "\r",
+      "x-512.pth            55%[==========>         ] 817.76M  10.9MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  80M Sep 13 21:43 gen-word-4200-count.jsonl\n"
+      "\r",
+      "-512.pth             55%[==========>         ] 820.31M  10.9MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  82M Sep 13 21:43 gen-word-4300-count.jsonl\n"
+      "\r",
+      "512.pth              56%[==========>         ] 822.85M  11.0MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  84M Sep 13 21:43 gen-word-4400-count.jsonl\n"
+      "\r",
+      "12.pth               56%[==========>         ] 825.40M  11.0MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  55K Sep 13 21:43 gen-word-45-count.jsonl\n"
+      "\r",
+      "2.pth                56%[==========>         ] 827.95M  11.0MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  86M Sep 13 21:43 gen-word-4500-count.jsonl\n"
+      "\r",
+      ".pth                 56%[==========>         ] 830.48M  11.1MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  88M Sep 13 21:43 gen-word-4600-count.jsonl\n"
+      "\r",
+      "pth                  56%[==========>         ] 833.04M  11.1MB/s    eta 45s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  90M Sep 13 21:43 gen-word-4700-count.jsonl\n"
+      "\r",
+      "th                   56%[==========>         ] 835.59M  11.1MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  92M Sep 13 21:43 gen-word-4800-count.jsonl\n"
+      "\r",
+      "h                    57%[==========>         ] 838.15M  11.2MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  94M Sep 13 21:43 gen-word-4900-count.jsonl\n"
+      "\r",
+      "                     57%[==========>         ] 840.71M  11.2MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  15K Sep 13 21:43 gen-word-5-count.jsonl\n"
+      "\r",
+      "                  v  57%[==========>         ] 843.26M  11.2MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  58K Sep 13 21:43 gen-word-50-count.jsonl\n"
+      "\r",
+      "                 v5  57%[==========>         ] 845.81M  11.2MB/s    eta 44s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 9.7M Sep 13 21:43 gen-word-500-count.jsonl\n"
+      "\r",
+      "                v5r  57%[==========>         ] 848.37M  11.2MB/s    eta 43s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  96M Sep 13 21:43 gen-word-5000-count.jsonl\n"
+      "\r",
+      "               v5r3  58%[==========>         ] 850.92M  11.2MB/s    eta 43s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  98M Sep 13 21:43 gen-word-5100-count.jsonl\n"
+      "\r",
+      "              v5r3-  58%[==========>         ] 853.46M  11.2MB/s    eta 43s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  99M Sep 13 21:43 gen-word-5200-count.jsonl\n"
+      "\r",
+      "             v5r3-L  58%[==========>         ] 856.03M  11.3MB/s    eta 43s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 101M Sep 13 21:43 gen-word-5300-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  58%[==========>         ] 858.57M  11.3MB/s    eta 43s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 103M Sep 13 21:43 gen-word-5400-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  58%[==========>         ] 861.12M  11.3MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  62K Sep 13 21:43 gen-word-55-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  58%[==========>         ] 863.68M  11.3MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 105M Sep 13 21:43 gen-word-5500-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  59%[==========>         ] 866.23M  11.3MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 107M Sep 13 21:43 gen-word-5600-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  59%[==========>         ] 868.79M  11.3MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 109M Sep 13 21:43 gen-word-5700-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  59%[==========>         ] 871.34M  11.3MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 111M Sep 13 21:43 gen-word-5800-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  59%[==========>         ] 873.90M  11.1MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 113M Sep 13 21:43 gen-word-5900-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  59%[==========>         ] 876.43M  10.8MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  65K Sep 13 21:43 gen-word-60-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  59%[==========>         ] 878.99M  11.3MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  12M Sep 13 21:43 gen-word-600-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  60%[===========>        ] 881.56M  11.1MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 115M Sep 13 21:43 gen-word-6000-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  60%[===========>        ] 883.37M  10.7MB/s    eta 42s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 117M Sep 13 21:43 gen-word-6100-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  60%[===========>        ] 887.10M  10.9MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 118M Sep 13 21:43 gen-word-6200-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  60%[===========>        ] 888.93M  10.6MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 120M Sep 13 21:43 gen-word-6300-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  60%[===========>        ] 890.81M  10.9MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 122M Sep 13 21:43 gen-word-6400-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  60%[===========>        ] 892.73M  10.6MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  72K Sep 13 21:43 gen-word-65-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  61%[===========>        ] 894.70M  10.3MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 124M Sep 13 21:43 gen-word-6500-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  61%[===========>        ] 896.70M  10.2MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 126M Sep 13 21:43 gen-word-6600-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  61%[===========>        ] 898.54M  10.2MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 128M Sep 13 21:43 gen-word-6700-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  61%[===========>        ] 899.99M  9.85MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 130M Sep 13 21:43 gen-word-6800-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  61%[===========>        ] 902.09M  9.77MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 132M Sep 13 21:43 gen-word-6900-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  61%[===========>        ] 904.21M  9.95MB/s    eta 41s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  79K Sep 13 21:43 gen-word-70-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  61%[===========>        ] 906.38M  9.73MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  14M Sep 13 21:43 gen-word-700-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  61%[===========>        ] 908.57M  9.56MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 134M Sep 13 21:43 gen-word-7000-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  62%[===========>        ] 910.79M  9.35MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 136M Sep 13 21:43 gen-word-7100-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  62%[===========>        ] 913.04M  9.35MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 137M Sep 13 21:43 gen-word-7200-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  62%[===========>        ] 915.31M  9.43MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 139M Sep 13 21:43 gen-word-7300-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  62%[===========>        ] 917.60M  9.34MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 141M Sep 13 21:43 gen-word-7400-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  62%[===========>        ] 919.93M  9.62MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  83K Sep 13 21:43 gen-word-75-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   62%[===========>        ] 922.26M  9.32MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 143M Sep 13 21:43 gen-word-7500-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    63%[===========>        ] 924.62M  9.54MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 145M Sep 13 21:43 gen-word-7600-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     63%[===========>        ] 927.01M  9.54MB/s    eta 40s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 147M Sep 13 21:43 gen-word-7700-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      63%[===========>        ] 929.40M  9.66MB/s    eta 39s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 149M Sep 13 21:43 gen-word-7800-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       63%[===========>        ] 931.81M  9.83MB/s    eta 39s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 151M Sep 13 21:43 gen-word-7900-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        63%[===========>        ] 934.23M  10.0MB/s    eta 39s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  87K Sep 13 21:43 gen-word-80-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         63%[===========>        ] 936.68M  9.99MB/s    eta 39s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  16M Sep 13 21:43 gen-word-800-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          64%[===========>        ] 939.13M  10.1MB/s    eta 39s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 153M Sep 13 21:43 gen-word-8000-count.jsonl\n"
+      "\r",
+      "tx-512.pth           64%[===========>        ] 941.60M  10.1MB/s    eta 38s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  92K Sep 13 21:43 gen-word-85-count.jsonl\n"
+      "\r",
+      "x-512.pth            64%[===========>        ] 944.09M  10.3MB/s    eta 38s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  95K Sep 13 21:43 gen-word-90-count.jsonl\n"
+      "\r",
+      "-512.pth             64%[===========>        ] 946.57M  10.5MB/s    eta 38s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  18M Sep 13 21:43 gen-word-900-count.jsonl\n"
+      "\r",
+      "512.pth              64%[===========>        ] 949.07M  10.3MB/s    eta 38s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  99K Sep 13 21:43 gen-word-95-count.jsonl\n"
+      "\r",
+      "12.pth               64%[===========>        ] 951.57M  10.5MB/s    eta 38s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep 13 21:43 shuffle-word-10-count.jsonl\n"
+      "\r",
+      "2.pth                65%[============>       ] 954.09M  10.7MB/s    eta 37s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 561K Sep 13 21:43 shuffle-word-100-count.jsonl\n"
+      "\r",
+      ".pth                 65%[============>       ] 956.54M  10.8MB/s    eta 37s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 524K Sep 13 21:43 shuffle-word-1000-count.jsonl\n"
+      "\r",
+      "pth                  65%[============>       ] 959.06M  10.6MB/s    eta 37s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep 13 21:43 shuffle-word-1100-count.jsonl\n"
+      "\r",
+      "th                   65%[============>       ] 961.60M  10.6MB/s    eta 37s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 523K Sep 13 21:43 shuffle-word-1200-count.jsonl\n"
+      "\r",
+      "h                    65%[============>       ] 964.13M  10.8MB/s    eta 37s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep 13 21:43 shuffle-word-1300-count.jsonl\n"
+      "\r",
+      "                     65%[============>       ] 966.70M  10.9MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep 13 21:43 shuffle-word-1400-count.jsonl\n"
+      "\r",
+      "                  v  66%[============>       ] 969.24M  10.8MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 443K Sep 13 21:43 shuffle-word-15-count.jsonl\n"
+      "\r",
+      "                 v5  66%[============>       ] 971.79M  10.7MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep 13 21:43 shuffle-word-1500-count.jsonl\n"
+      "\r",
+      "                v5r  66%[============>       ] 974.35M  10.9MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep 13 21:43 shuffle-word-1600-count.jsonl\n"
+      "\r",
+      "               v5r3  66%[============>       ] 976.92M  11.1MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep 13 21:43 shuffle-word-1700-count.jsonl\n"
+      "\r",
+      "              v5r3-  66%[============>       ] 979.48M  11.1MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep 13 21:43 shuffle-word-1800-count.jsonl\n"
+      "\r",
+      "             v5r3-L  66%[============>       ] 982.04M  10.8MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep 13 21:43 shuffle-word-1900-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  67%[============>       ] 984.60M  11.0MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 379K Sep 13 21:43 shuffle-word-20-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  67%[============>       ] 987.18M  11.2MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 545K Sep 13 21:43 shuffle-word-200-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  67%[============>       ] 989.74M  11.1MB/s    eta 36s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep 13 21:43 shuffle-word-2000-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  67%[============>       ] 992.31M  11.1MB/s    eta 35s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep 13 21:43 shuffle-word-2100-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  67%[============>       ] 994.88M  11.2MB/s    eta 35s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 524K Sep 13 21:43 shuffle-word-2200-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  68%[============>       ] 997.45M  11.1MB/s    eta 35s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep 13 21:43 shuffle-word-2300-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  68%[============>       ]   1000M  11.1MB/s    eta 35s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 518K Sep 13 21:43 shuffle-word-2400-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  68%[============>       ]   1003M  11.3MB/s    eta 35s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 349K Sep 13 21:43 shuffle-word-25-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  68%[============>       ]   1005M  11.2MB/s    eta 34s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep 13 21:43 shuffle-word-2500-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  68%[============>       ]   1008M  11.3MB/s    eta 34s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 512K Sep 13 21:43 shuffle-word-2600-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  68%[============>       ]   1010M  11.2MB/s    eta 34s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 513K Sep 13 21:43 shuffle-word-2700-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  69%[============>       ]   1013M  11.2MB/s    eta 34s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-2800-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  69%[============>       ]   1015M  11.2MB/s    eta 34s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-2900-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  69%[============>       ]   1018M  11.2MB/s    eta 33s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 343K Sep 13 21:43 shuffle-word-30-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  69%[============>       ]   1021M  11.0MB/s    eta 33s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 534K Sep 13 21:43 shuffle-word-300-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  69%[============>       ]   1023M  11.2MB/s    eta 33s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-3000-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  69%[============>       ]   1.00G  11.3MB/s    eta 33s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-3100-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  70%[=============>      ]   1.00G  11.2MB/s    eta 33s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-3200-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  70%[=============>      ]   1.01G  11.0MB/s    eta 32s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3300-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  70%[=============>      ]   1.01G  11.2MB/s    eta 32s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3400-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  70%[=============>      ]   1.01G  11.2MB/s    eta 32s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 335K Sep 13 21:43 shuffle-word-35-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  70%[=============>      ]   1.01G  11.0MB/s    eta 32s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3500-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  70%[=============>      ]   1.02G  11.2MB/s    eta 32s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3600-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  71%[=============>      ]   1.02G  11.4MB/s    eta 31s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3700-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  71%[=============>      ]   1.02G  11.4MB/s    eta 31s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3800-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  71%[=============>      ]   1.02G  11.2MB/s    eta 31s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-3900-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  71%[=============>      ]   1.03G  11.3MB/s    eta 31s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 323K Sep 13 21:43 shuffle-word-40-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  71%[=============>      ]   1.03G  11.4MB/s    eta 31s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 529K Sep 13 21:43 shuffle-word-400-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   72%[=============>      ]   1.03G  11.4MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4000-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    72%[=============>      ]   1.03G  11.4MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4100-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     72%[=============>      ]   1.04G  11.4MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4200-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      72%[=============>      ]   1.04G  11.3MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4300-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       72%[=============>      ]   1.04G  11.1MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-4400-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        72%[=============>      ]   1.04G  11.5MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 313K Sep 13 21:43 shuffle-word-45-count.jsonl\n"
+      "\r",
+      "-ctx-512.pth         73%[=============>      ]   1.05G  11.3MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4500-count.jsonl\n"
+      "\r",
+      "ctx-512.pth          73%[=============>      ]   1.05G  11.2MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-4600-count.jsonl\n"
+      "\r",
+      "tx-512.pth           73%[=============>      ]   1.05G  11.4MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4700-count.jsonl\n"
+      "\r",
+      "x-512.pth            73%[=============>      ]   1.05G  11.4MB/s    eta 30s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-4800-count.jsonl\n"
+      "\r",
+      "-512.pth             73%[=============>      ]   1.06G  11.3MB/s    eta 29s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-4900-count.jsonl\n"
+      "\r",
+      "512.pth              74%[=============>      ]   1.06G  11.5MB/s    eta 29s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 822K Sep 13 21:43 shuffle-word-5-count.jsonl\n"
+      "\r",
+      "12.pth               74%[=============>      ]   1.06G  11.7MB/s    eta 29s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 308K Sep 13 21:43 shuffle-word-50-count.jsonl\n"
+      "\r",
+      "2.pth                74%[=============>      ]   1.07G  11.6MB/s    eta 29s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 528K Sep 13 21:43 shuffle-word-500-count.jsonl\n"
+      "\r",
+      ".pth                 74%[=============>      ]   1.07G  11.4MB/s    eta 29s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5000-count.jsonl\n"
+      "\r",
+      "pth                  74%[=============>      ]   1.07G  11.6MB/s    eta 28s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5100-count.jsonl\n"
+      "\r",
+      "th                   75%[==============>     ]   1.07G  11.9MB/s    eta 28s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5200-count.jsonl\n"
+      "\r",
+      "h                    75%[==============>     ]   1.08G  11.9MB/s    eta 28s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5300-count.jsonl\n"
+      "\r",
+      "                     75%[==============>     ]   1.08G  11.9MB/s    eta 28s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5400-count.jsonl\n"
+      "\r",
+      "                  v  75%[==============>     ]   1.08G  11.9MB/s    eta 28s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 306K Sep 13 21:43 shuffle-word-55-count.jsonl\n"
+      "\r",
+      "                 v5  75%[==============>     ]   1.08G  12.0MB/s    eta 27s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5500-count.jsonl\n"
+      "\r",
+      "                v5r  75%[==============>     ]   1.09G  12.0MB/s    eta 27s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5600-count.jsonl\n"
+      "\r",
+      "               v5r3  76%[==============>     ]   1.09G  11.9MB/s    eta 27s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5700-count.jsonl\n"
+      "\r",
+      "              v5r3-  76%[==============>     ]   1.09G  12.2MB/s    eta 27s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5800-count.jsonl\n"
+      "\r",
+      "             v5r3-L  76%[==============>     ]   1.10G  12.4MB/s    eta 27s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-5900-count.jsonl\n"
+      "\r",
+      "            v5r3-L6  76%[==============>     ]   1.10G  12.5MB/s    eta 25s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 300K Sep 13 21:43 shuffle-word-60-count.jsonl\n"
+      "\r",
+      "           v5r3-L6-  77%[==============>     ]   1.10G  12.4MB/s    eta 25s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 528K Sep 13 21:43 shuffle-word-600-count.jsonl\n"
+      "\r",
+      "          v5r3-L6-D  77%[==============>     ]   1.11G  12.3MB/s    eta 25s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6000-count.jsonl\n"
+      "\r",
+      "         v5r3-L6-D2  77%[==============>     ]   1.11G  12.7MB/s    eta 25s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-6100-count.jsonl\n"
+      "\r",
+      "        v5r3-L6-D25  77%[==============>     ]   1.11G  12.9MB/s    eta 25s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6200-count.jsonl\n"
+      "\r",
+      "       v5r3-L6-D256  77%[==============>     ]   1.12G  13.0MB/s    eta 24s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6300-count.jsonl\n"
+      "\r",
+      "      v5r3-L6-D2560  78%[==============>     ]   1.12G  12.9MB/s    eta 24s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6400-count.jsonl\n"
+      "\r",
+      "     v5r3-L6-D2560-  78%[==============>     ]   1.12G  13.1MB/s    eta 24s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 299K Sep 13 21:43 shuffle-word-65-count.jsonl\n"
+      "\r",
+      "    v5r3-L6-D2560-E  78%[==============>     ]   1.12G  13.2MB/s    eta 24s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-6500-count.jsonl\n"
+      "\r",
+      "   v5r3-L6-D2560-E0  78%[==============>     ]   1.13G  13.1MB/s    eta 24s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-6600-count.jsonl\n"
+      "\r",
+      "  v5r3-L6-D2560-E0_  79%[==============>     ]   1.13G  13.5MB/s    eta 23s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6700-count.jsonl\n"
+      "\r",
+      " v5r3-L6-D2560-E0_1  79%[==============>     ]   1.14G  13.7MB/s    eta 23s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6800-count.jsonl\n"
+      "\r",
+      "v5r3-L6-D2560-E0_1-  79%[==============>     ]   1.14G  13.9MB/s    eta 23s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-6900-count.jsonl\n"
+      "\r",
+      "5r3-L6-D2560-E0_1-m  79%[==============>     ]   1.14G  14.1MB/s    eta 23s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 291K Sep 13 21:43 shuffle-word-70-count.jsonl\n"
+      "\r",
+      "r3-L6-D2560-E0_1-me  80%[===============>    ]   1.15G  14.3MB/s    eta 23s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 525K Sep 13 21:43 shuffle-word-700-count.jsonl\n"
+      "\r",
+      "3-L6-D2560-E0_1-mem  80%[===============>    ]   1.15G  14.4MB/s    eta 21s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7000-count.jsonl\n"
+      "\r",
+      "-L6-D2560-E0_1-mem-  80%[===============>    ]   1.15G  14.5MB/s    eta 21s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7100-count.jsonl\n"
+      "\r",
+      "L6-D2560-E0_1-mem-c  80%[===============>    ]   1.16G  14.7MB/s    eta 21s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7200-count.jsonl\n"
+      "\r",
+      "6-D2560-E0_1-mem-ct  81%[===============>    ]   1.16G  14.9MB/s    eta 21s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7300-count.jsonl\n"
+      "\r",
+      "-D2560-E0_1-mem-ctx  81%[===============>    ]   1.17G  15.0MB/s    eta 21s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7400-count.jsonl\n"
+      "\r",
+      "D2560-E0_1-mem-ctx-  81%[===============>    ]   1.17G  15.2MB/s    eta 20s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 289K Sep 13 21:43 shuffle-word-75-count.jsonl\n"
+      "\r",
+      "2560-E0_1-mem-ctx-5  81%[===============>    ]   1.17G  15.3MB/s    eta 20s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7500-count.jsonl\n"
+      "\r",
+      "560-E0_1-mem-ctx-51  82%[===============>    ]   1.18G  15.4MB/s    eta 20s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7600-count.jsonl\n"
+      "\r",
+      "60-E0_1-mem-ctx-512  82%[===============>    ]   1.18G  15.5MB/s    eta 20s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-7700-count.jsonl\n"
+      "\r",
+      "0-E0_1-mem-ctx-512.  82%[===============>    ]   1.18G  15.7MB/s    eta 20s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-7800-count.jsonl\n"
+      "\r",
+      "-E0_1-mem-ctx-512.p  82%[===============>    ]   1.19G  15.8MB/s    eta 19s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep 13 21:43 shuffle-word-7900-count.jsonl\n"
+      "\r",
+      "E0_1-mem-ctx-512.pt  83%[===============>    ]   1.19G  16.1MB/s    eta 19s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 288K Sep 13 21:43 shuffle-word-80-count.jsonl\n"
+      "\r",
+      "0_1-mem-ctx-512.pth  83%[===============>    ]   1.19G  16.2MB/s    eta 19s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 527K Sep 13 21:43 shuffle-word-800-count.jsonl\n"
+      "\r",
+      "_1-mem-ctx-512.pth   83%[===============>    ]   1.20G  16.3MB/s    eta 19s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep 13 21:43 shuffle-word-8000-count.jsonl\n"
+      "\r",
+      "1-mem-ctx-512.pth    83%[===============>    ]   1.20G  16.4MB/s    eta 19s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 287K Sep 13 21:43 shuffle-word-85-count.jsonl\n"
+      "\r",
+      "-mem-ctx-512.pth     84%[===============>    ]   1.21G  16.4MB/s    eta 17s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 288K Sep 13 21:43 shuffle-word-90-count.jsonl\n"
+      "\r",
+      "mem-ctx-512.pth      84%[===============>    ]   1.21G  16.4MB/s    eta 17s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 524K Sep 13 21:43 shuffle-word-900-count.jsonl\n"
+      "\r",
+      "em-ctx-512.pth       84%[===============>    ]   1.21G  16.4MB/s    eta 17s    "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 288K Sep 13 21:43 shuffle-word-95-count.jsonl\n"
+      "\r",
+      "m-ctx-512.pth        84%[===============>    ]   1.22G  16.4MB/s    eta 17s    "
      ]
-    }
-   ],
-   "source": [
-    "%%script bash\n",
-    "\n",
-    "########################################\n",
-    "# Generate the required jsonl dataset\n",
-    "########################################\n",
-    "\n",
-    "# Go to config dir\n",
-    "cd \"../\"\n",
-    "\n",
-    "# Reset the dataset dir\n",
-    "mkdir -p ../dataset\n",
-    "rm -rf ../dataset/*.jsonl\n",
-    "\n",
-    "# Generate the various datasets\n",
-    "echo \"## Generating word reptition dataset ##\"\n",
-    "\n",
-    "#\n",
-    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
-    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
-    "#\n",
-    "for i in {5..100..5} \n",
-    "do\n",
-    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
-    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
-    "done\n",
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         85%[================>   ]   1.22G  16.5MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          85%[================>   ]   1.22G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           85%[================>   ]   1.23G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            86%[================>   ]   1.23G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             86%[================>   ]   1.24G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              86%[================>   ]   1.24G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               86%[================>   ]   1.24G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                87%[================>   ]   1.25G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 87%[================>   ]   1.25G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  87%[================>   ]   1.25G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   87%[================>   ]   1.26G  16.6MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    88%[================>   ]   1.26G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     88%[================>   ]   1.26G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  88%[================>   ]   1.27G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  88%[================>   ]   1.27G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  89%[================>   ]   1.28G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  89%[================>   ]   1.28G  16.5MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  89%[================>   ]   1.28G  16.5MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  89%[================>   ]   1.28G  16.0MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  89%[================>   ]   1.29G  15.8MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  90%[=================>  ]   1.29G  15.6MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  90%[=================>  ]   1.29G  15.5MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  90%[=================>  ]   1.30G  15.3MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  90%[=================>  ]   1.30G  15.2MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  90%[=================>  ]   1.30G  15.1MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  91%[=================>  ]   1.31G  15.0MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  91%[=================>  ]   1.31G  14.9MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  91%[=================>  ]   1.31G  14.8MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  91%[=================>  ]   1.32G  14.7MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  92%[=================>  ]   1.32G  14.6MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  92%[=================>  ]   1.32G  14.6MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  92%[=================>  ]   1.33G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  92%[=================>  ]   1.33G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  93%[=================>  ]   1.33G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  93%[=================>  ]   1.34G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  93%[=================>  ]   1.34G  14.2MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  93%[=================>  ]   1.34G  14.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  94%[=================>  ]   1.35G  14.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  94%[=================>  ]   1.35G  14.8MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  94%[=================>  ]   1.35G  15.0MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  94%[=================>  ]   1.36G  15.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  95%[==================> ]   1.36G  15.3MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  95%[==================> ]   1.37G  15.4MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  95%[==================> ]   1.37G  15.5MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  95%[==================> ]   1.37G  15.7MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  96%[==================> ]   1.38G  15.8MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  96%[==================> ]   1.38G  15.9MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   96%[==================> ]   1.38G  16.0MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    96%[==================> ]   1.39G  16.0MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     97%[==================> ]   1.39G  16.2MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      97%[==================> ]   1.40G  16.2MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       97%[==================> ]   1.40G  16.4MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        97%[==================> ]   1.40G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         98%[==================> ]   1.41G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          98%[==================> ]   1.41G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           98%[==================> ]   1.41G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            99%[==================> ]   1.42G  16.6MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             99%[==================> ]   1.42G  16.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              99%[==================> ]   1.42G  16.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               99%[==================> ]   1.43G  16.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1- 100%[===================>]   1.43G  17.1MB/s    in 1m 46s  \r\n",
+      "\r\n",
+      "2023-09-14 00:23:39 (13.9 MB/s) - ‘v5r3-L6-D2560-E0_1-mem-ctx-512.pth’ saved [1537632513/1537632513]\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 1.5G\r\n",
+      "drwxr-xr-x  2 root root    3 Sep 14 00:21 .\r\n",
+      "drwxr-xr-x 20 root root   24 Sep 14 00:21 ..\r\n",
+      "-rw-r--r--  1 root root 1.5G Sep 13 12:45 v5r3-L6-D2560-E0_1-mem-ctx-512.pth\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44993c1b",
+   "metadata": {
+    "papermill": {
+     "duration": 0.040583,
+     "end_time": "2023-09-14T00:23:39.603420",
+     "exception": false,
+     "start_time": "2023-09-14T00:23:39.562837",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 3: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "3d8f956f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:23:39.641463Z",
+     "iopub.status.busy": "2023-09-14T00:23:39.641175Z",
+     "iopub.status.idle": "2023-09-14T00:24:02.625663Z",
+     "shell.execute_reply": "2023-09-14T00:24:02.625201Z"
+    },
+    "papermill": {
+     "duration": 23.023454,
+     "end_time": "2023-09-14T00:24:02.645057",
+     "exception": false,
+     "start_time": "2023-09-14T00:23:39.621603",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 357 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 328 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 277 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 383 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 300 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 657 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 746 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 479 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 438 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1057 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1300 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 317 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 527 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 873 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 262 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 180 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 410 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 524 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1765 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 2594 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 270 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 27 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 55 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5535 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 2.2G\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20K Sep 14 00:23 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 2.1M Sep 14 00:23 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20M Sep 14 00:23 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  22M Sep 14 00:23 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  23M Sep 14 00:23 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25M Sep 14 00:23 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27M Sep 14 00:23 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25K Sep 14 00:23 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29M Sep 14 00:23 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31M Sep 14 00:23 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33M Sep 14 00:23 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35M Sep 14 00:23 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  37M Sep 14 00:23 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Sep 14 00:23 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 4.0M Sep 14 00:23 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39M Sep 14 00:23 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  41M Sep 14 00:23 gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42M Sep 14 00:23 gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44M Sep 14 00:23 gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  46M Sep 14 00:23 gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  34K Sep 14 00:23 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  48M Sep 14 00:23 gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  50M Sep 14 00:23 gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  52M Sep 14 00:23 gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54M Sep 14 00:23 gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  56M Sep 14 00:23 gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  40K Sep 14 00:23 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 5.9M Sep 14 00:23 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58M Sep 14 00:23 gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  60M Sep 14 00:23 gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  61M Sep 14 00:23 gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  63M Sep 14 00:23 gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65M Sep 14 00:23 gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44K Sep 14 00:23 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  67M Sep 14 00:23 gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69M Sep 14 00:23 gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  71M Sep 14 00:23 gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73M Sep 14 00:23 gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  75M Sep 14 00:23 gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  49K Sep 14 00:23 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 7.9M Sep 14 00:23 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77M Sep 14 00:23 gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79M Sep 14 00:23 gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  80M Sep 14 00:23 gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82M Sep 14 00:23 gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84M Sep 14 00:23 gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54K Sep 14 00:23 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86M Sep 14 00:23 gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88M Sep 14 00:23 gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90M Sep 14 00:23 gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  92M Sep 14 00:23 gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94M Sep 14 00:23 gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  15K Sep 14 00:23 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  59K Sep 14 00:23 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 9.7M Sep 14 00:23 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  96M Sep 14 00:23 gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  98M Sep 14 00:23 gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99M Sep 14 00:23 gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101M Sep 14 00:24 gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103M Sep 14 00:23 gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  64K Sep 14 00:23 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 105M Sep 14 00:23 gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 107M Sep 14 00:23 gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 109M Sep 14 00:24 gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 111M Sep 14 00:24 gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 113M Sep 14 00:24 gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  68K Sep 14 00:23 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  12M Sep 14 00:23 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 115M Sep 14 00:23 gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117M Sep 14 00:23 gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 118M Sep 14 00:23 gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 120M Sep 14 00:24 gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 122M Sep 14 00:24 gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73K Sep 14 00:23 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 124M Sep 14 00:24 gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 126M Sep 14 00:24 gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 128M Sep 14 00:24 gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 130M Sep 14 00:24 gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 132M Sep 14 00:24 gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77K Sep 14 00:23 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  14M Sep 14 00:23 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134M Sep 14 00:24 gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 136M Sep 14 00:24 gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 137M Sep 14 00:24 gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 139M Sep 14 00:24 gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 141M Sep 14 00:24 gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84K Sep 14 00:23 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 143M Sep 14 00:24 gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 145M Sep 14 00:24 gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 147M Sep 14 00:24 gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 149M Sep 14 00:24 gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 151M Sep 14 00:24 gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  89K Sep 14 00:23 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  16M Sep 14 00:23 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 153M Sep 14 00:24 gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  93K Sep 14 00:23 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  98K Sep 14 00:23 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  18M Sep 14 00:23 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 104K Sep 14 00:23 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 500K Sep 14 00:23 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 567K Sep 14 00:23 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 523K Sep 14 00:23 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 427K Sep 14 00:23 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 517K Sep 14 00:23 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep 14 00:23 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 381K Sep 14 00:23 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 543K Sep 14 00:23 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Sep 14 00:23 shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 358K Sep 14 00:23 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 513K Sep 14 00:23 shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 512K Sep 14 00:23 shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 338K Sep 14 00:23 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 530K Sep 14 00:23 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 320K Sep 14 00:23 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 324K Sep 14 00:23 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 530K Sep 14 00:23 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 313K Sep 14 00:23 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 791K Sep 14 00:23 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 299K Sep 14 00:23 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 524K Sep 14 00:23 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 302K Sep 14 00:23 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 298K Sep 14 00:23 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 525K Sep 14 00:23 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 299K Sep 14 00:23 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 297K Sep 14 00:23 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 526K Sep 14 00:23 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 293K Sep 14 00:23 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 289K Sep 14 00:23 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 525K Sep 14 00:23 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 288K Sep 14 00:23 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 284K Sep 14 00:23 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 284K Sep 14 00:23 shuffle-word-95-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Go to config dir\n",
+    "cd \"../\"\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "for i in {5..100..5} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
+    "done\n",
     "\n",
     "#\n",
     "# Ramping up the 100+ - 4200 words dataset\n",
@@ -3146,106 +6883,610 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "fe8bdefe",
+   "id": "701b6753",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:43:47.086026Z",
-     "iopub.status.busy": "2023-09-13T21:43:47.085626Z",
-     "iopub.status.idle": "2023-09-13T21:44:04.543593Z",
-     "shell.execute_reply": "2023-09-13T21:44:04.542745Z"
+     "iopub.execute_input": "2023-09-14T00:24:02.712544Z",
+     "iopub.status.busy": "2023-09-14T00:24:02.712008Z",
+     "iopub.status.idle": "2023-09-14T00:24:35.560189Z",
+     "shell.execute_reply": "2023-09-14T00:24:35.559394Z"
+    },
+    "papermill": {
+     "duration": 32.883629,
+     "end_time": "2023-09-14T00:24:35.562107",
+     "exception": false,
+     "start_time": "2023-09-14T00:24:02.678478",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-09-14 00:24:05,633] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2560-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2560-E0_1-mem-ctx-512.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4194784656\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 4194784656\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230914_002408-wrr91tv7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/wrr91tv7\u001b[0m\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:554: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GPU available: True (cuda), used: True\r\n",
+      "TPU available: False, using: 0 TPU cores\r\n",
+      "IPU available: False, using: 0 IPUs\r\n",
+      "HPU available: False, using: 0 HPUs\r\n",
+      "\r\n",
+      "\r\n",
+      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
+      "   - target_batch_size:       256\r\n",
+      "   - num_nodes:               1\r\n",
+      "   - num_devices:             1\r\n",
+      "   - accumulate_grad_batches: 256\r\n",
+      "   - effective_batch_size:    256\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Resolving data files:   0%|                             | 0/198 [00:00<?, ?it/s]\r",
+      "Resolving data files: 100%|███████████████| 198/198 [00:00<00:00, 148883.51it/s]\r\n",
+      "\r",
+      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 287.97it/s]\r\n",
+      "\r",
+      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00, 12.62it/s]\r\n",
+      "\r",
+      "Generating train split: 0 examples [00:00, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 2625 examples [00:00, 3049.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 17014 examples [00:00, 22989.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 33146 examples [00:01, 46148.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 44997 examples [00:01, 51154.78 examples/s]"
+     ]
     },
-    "papermill": {
-     "duration": 17.477036,
-     "end_time": "2023-09-13T21:44:04.545689",
-     "exception": false,
-     "start_time": "2023-09-13T21:43:47.068653",
-     "status": "completed"
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 55124 examples [00:01, 48517.02 examples/s]"
+     ]
     },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2023-09-13 21:43:49,965] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+      "\r",
+      "Generating train split: 64087 examples [00:01, 35495.81 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+      "\r",
+      "Generating train split: 70515 examples [00:02, 36667.80 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L12-D2560-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L12-D2560-E0_1-mem-ctx-512.pth'].\r\n",
-      "  rank_zero_warn(\r\n"
+      "\r",
+      "Generating train split: 75772 examples [00:02, 30708.14 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1365345879\r\n",
-      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 1365345879\r\n"
+      "\r",
+      "Generating train split: 80333 examples [00:02, 30088.94 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+      "\r",
+      "Generating train split: 84141 examples [00:02, 27930.07 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230913_214352-oqbu77ao\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L12-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/oqbu77ao\u001b[0m\r\n"
+      "\r",
+      "Generating train split: 87548 examples [00:02, 27984.68 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in <module>\r\n",
-      "    cli_main()\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n",
-      "    LightningCLI(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
-      "    self.instantiate_classes()\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
-      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n",
-      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n",
-      "    component.instantiate_class(component, cfg)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n",
-      "    parent[key] = group.group_class(**value)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 565, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '../model/v5r3-L12-D2560-E0_1-mem-ctx-512.pth' does not exist\r\n"
+      "\r",
+      "Generating train split: 90787 examples [00:02, 26886.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 93778 examples [00:03, 27040.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 96973 examples [00:03, 27248.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 101191 examples [00:03, 30724.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 106104 examples [00:03, 34273.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 110290 examples [00:03, 34016.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 113802 examples [00:03, 32420.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 117160 examples [00:03, 22500.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 120238 examples [00:04, 21384.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 122791 examples [00:04, 21213.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 125835 examples [00:04, 23194.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 129263 examples [00:04, 24367.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to read file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset/shuffle-word-100-count.jsonl' with error <class 'pyarrow.lib.ArrowInvalid'>: JSON parse error: Missing a comma or '}' after an object member. in row 233\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 136606 examples [00:04, 35027.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 140455 examples [00:04, 20618.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 143531 examples [00:05, 21445.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 146681 examples [00:05, 22082.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 149694 examples [00:05, 22730.77 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "\r",
+      "Generating train split: 152975 examples [00:05, 22489.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 155530 examples [00:05, 20086.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 158092 examples [00:05, 15943.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 160242 examples [00:06, 15481.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 162322 examples [00:06, 14098.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 164028 examples [00:06, 13989.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 165725 examples [00:06, 13009.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 167287 examples [00:06, 12674.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 169262 examples [00:06, 13937.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 170803 examples [00:06, 14075.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 172262 examples [00:06, 13051.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 173727 examples [00:07, 12319.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 175213 examples [00:07, 12084.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 176512 examples [00:07, 11651.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 177716 examples [00:07, 11150.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 178902 examples [00:07, 11209.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 180206 examples [00:07, 11234.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 181684 examples [00:07, 11864.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 182992 examples [00:07, 11920.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 183622 examples [00:08, 22556.31 examples/s]\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "multiprocess.pool.RemoteTraceback: \r\n",
+      "\"\"\"\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 144, in _generate_tables\r\n",
+      "    dataset = json.load(f)\r\n",
+      "  File \"/usr/lib/python3.10/json/__init__.py\", line 293, in load\r\n",
+      "    return loads(fp.read(),\r\n",
+      "  File \"/usr/lib/python3.10/json/__init__.py\", line 346, in loads\r\n",
+      "    return _default_decoder.decode(s)\r\n",
+      "  File \"/usr/lib/python3.10/json/decoder.py\", line 340, in decode\r\n",
+      "    raise JSONDecodeError(\"Extra data\", s, end)\r\n",
+      "json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 1121)\r\n",
+      "\r\n",
+      "During handling of the above exception, another exception occurred:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1925, in _prepare_split_single\r\n",
+      "    for _, table in generator:\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 147, in _generate_tables\r\n",
+      "    raise e\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 121, in _generate_tables\r\n",
+      "    pa_table = paj.read_json(\r\n",
+      "  File \"pyarrow/_json.pyx\", line 258, in pyarrow._json.read_json\r\n",
+      "  File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\r\n",
+      "  File \"pyarrow/error.pxi\", line 100, in pyarrow.lib.check_status\r\n",
+      "pyarrow.lib.ArrowInvalid: JSON parse error: Missing a comma or '}' after an object member. in row 233\r\n",
+      "\r\n",
+      "The above exception was the direct cause of the following exception:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 125, in worker\r\n",
+      "    result = (True, func(*args, **kwds))\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1347, in _write_generator_to_queue\r\n",
+      "    for i, result in enumerate(func(**kwargs)):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1958, in _prepare_split_single\r\n",
+      "    raise DatasetGenerationError(\"An error occurred while generating the dataset\") from e\r\n",
+      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n",
+      "\"\"\"\r\n",
+      "\r\n",
+      "The above exception was the direct cause of the following exception:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n",
+      "    self._run_subcommand(self.subcommand)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n",
+      "    fn(**fn_kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n",
+      "    call._call_and_handle_interrupt(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n",
+      "    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n",
+      "    return function(*args, **kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n",
+      "    self._run(model, ckpt_path=ckpt_path)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n",
+      "    self._data_connector.prepare_data()\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n",
+      "    call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n",
+      "    return fn(*args, **kwargs)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n",
+      "    prepare_data_static(**self._init_locals)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n",
+      "    src_dataset = load_dataset(**load_dataset_params)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2136, in load_dataset\r\n",
+      "    builder_instance.download_and_prepare(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 954, in download_and_prepare\r\n",
+      "    self._download_and_prepare(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1049, in _download_and_prepare\r\n",
+      "    self._prepare_split(split_generator, **prepare_split_kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1842, in _prepare_split\r\n",
+      "    for job_id, done, content in iflatmap_unordered(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in iflatmap_unordered\r\n",
+      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in <listcomp>\r\n",
+      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 774, in get\r\n",
+      "    raise self._value\r\n",
+      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
      ]
     },
@@ -3253,10 +7494,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/oqbu77ao\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v51\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/wrr91tv7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v53\u001b[0m\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230913_214352-oqbu77ao/logs\u001b[0m\r\n"
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230914_002408-wrr91tv7/logs\u001b[0m\r\n"
      ]
     }
    ],
@@ -3282,19 +7523,19 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "id": "338ebb3a",
+   "id": "500c7607",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:44:04.583165Z",
-     "iopub.status.busy": "2023-09-13T21:44:04.582610Z",
-     "iopub.status.idle": "2023-09-13T21:44:06.987560Z",
-     "shell.execute_reply": "2023-09-13T21:44:06.986803Z"
+     "iopub.execute_input": "2023-09-14T00:24:35.637916Z",
+     "iopub.status.busy": "2023-09-14T00:24:35.637654Z",
+     "iopub.status.idle": "2023-09-14T00:24:38.174650Z",
+     "shell.execute_reply": "2023-09-14T00:24:38.173904Z"
     },
     "papermill": {
-     "duration": 2.423633,
-     "end_time": "2023-09-13T21:44:06.989296",
+     "duration": 2.574032,
+     "end_time": "2023-09-14T00:24:38.176339",
      "exception": false,
-     "start_time": "2023-09-13T21:44:04.565663",
+     "start_time": "2023-09-14T00:24:35.602307",
      "status": "completed"
     },
     "tags": []
@@ -3304,7 +7545,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2023-09-13 21:44:06,121] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+      "[2023-09-14 00:24:37,304] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
@@ -3318,14 +7559,14 @@
       "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
       "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
       "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
-      "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2560-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "ls: cannot access '../model/v5r3-L12-D2560-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
+      "ls: cannot access '../model/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
      ]
     }
    ],
@@ -3341,19 +7582,19 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "9f9af732",
+   "id": "a169a91a",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:44:07.049788Z",
-     "iopub.status.busy": "2023-09-13T21:44:07.049295Z",
-     "iopub.status.idle": "2023-09-13T21:44:07.282197Z",
-     "shell.execute_reply": "2023-09-13T21:44:07.281400Z"
+     "iopub.execute_input": "2023-09-14T00:24:38.251360Z",
+     "iopub.status.busy": "2023-09-14T00:24:38.251096Z",
+     "iopub.status.idle": "2023-09-14T00:24:38.489940Z",
+     "shell.execute_reply": "2023-09-14T00:24:38.489202Z"
     },
     "papermill": {
-     "duration": 0.252463,
-     "end_time": "2023-09-13T21:44:07.283862",
+     "duration": 0.276215,
+     "end_time": "2023-09-14T00:24:38.491553",
      "exception": false,
-     "start_time": "2023-09-13T21:44:07.031399",
+     "start_time": "2023-09-14T00:24:38.215338",
      "status": "completed"
     },
     "tags": []
@@ -3375,19 +7616,19 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "53b57a14",
+   "id": "57ad36b1",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-13T21:44:07.322800Z",
-     "iopub.status.busy": "2023-09-13T21:44:07.322265Z",
-     "iopub.status.idle": "2023-09-13T21:44:07.553994Z",
-     "shell.execute_reply": "2023-09-13T21:44:07.553234Z"
+     "iopub.execute_input": "2023-09-14T00:24:38.567093Z",
+     "iopub.status.busy": "2023-09-14T00:24:38.566558Z",
+     "iopub.status.idle": "2023-09-14T00:24:38.802491Z",
+     "shell.execute_reply": "2023-09-14T00:24:38.801579Z"
     },
     "papermill": {
-     "duration": 0.251029,
-     "end_time": "2023-09-13T21:44:07.555639",
+     "duration": 0.273683,
+     "end_time": "2023-09-14T00:24:38.804465",
      "exception": false,
-     "start_time": "2023-09-13T21:44:07.304610",
+     "start_time": "2023-09-14T00:24:38.530782",
      "status": "completed"
     },
     "tags": []
@@ -3426,14 +7667,14 @@
   },
   "papermill": {
    "default_parameters": {},
-   "duration": 48.916499,
-   "end_time": "2023-09-13T21:44:07.691878",
+   "duration": 171.070576,
+   "end_time": "2023-09-14T00:24:38.962638",
    "environment_variables": {},
    "exception": null,
    "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb",
    "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb",
    "parameters": {},
-   "start_time": "2023-09-13T21:43:18.775379",
+   "start_time": "2023-09-14T00:21:47.892062",
    "version": "2.4.0"
   }
  },