diff --git "a/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb" "b/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
--- "a/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
+++ "b/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
@@ -3,13 +3,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "98614751",
+   "id": "0aa470f4",
    "metadata": {
     "papermill": {
-     "duration": 0.002691,
-     "end_time": "2023-09-01T14:16:49.338379",
+     "duration": 0.002959,
+     "end_time": "2023-09-01T14:53:06.162658",
      "exception": false,
-     "start_time": "2023-09-01T14:16:49.335688",
+     "start_time": "2023-09-01T14:53:06.159699",
      "status": "completed"
     },
     "tags": []
@@ -28,13 +28,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "52adcd1b",
+   "id": "30b4429d",
    "metadata": {
     "papermill": {
-     "duration": 0.001694,
-     "end_time": "2023-09-01T14:16:49.342111",
+     "duration": 0.001695,
+     "end_time": "2023-09-01T14:53:06.166400",
      "exception": false,
-     "start_time": "2023-09-01T14:16:49.340417",
+     "start_time": "2023-09-01T14:53:06.164705",
      "status": "completed"
     },
     "tags": []
@@ -46,19 +46,19 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "7c88d8f2",
+   "id": "b9d8c411",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:16:49.346569Z",
-     "iopub.status.busy": "2023-09-01T14:16:49.346390Z",
-     "iopub.status.idle": "2023-09-01T14:16:50.066273Z",
-     "shell.execute_reply": "2023-09-01T14:16:50.065401Z"
+     "iopub.execute_input": "2023-09-01T14:53:06.171899Z",
+     "iopub.status.busy": "2023-09-01T14:53:06.171621Z",
+     "iopub.status.idle": "2023-09-01T14:53:06.892025Z",
+     "shell.execute_reply": "2023-09-01T14:53:06.891053Z"
     },
     "papermill": {
-     "duration": 0.724451,
-     "end_time": "2023-09-01T14:16:50.068366",
+     "duration": 0.72586,
+     "end_time": "2023-09-01T14:53:06.894118",
      "exception": false,
-     "start_time": "2023-09-01T14:16:49.343915",
+     "start_time": "2023-09-01T14:53:06.168258",
      "status": "completed"
     },
     "tags": []
@@ -74,19 +74,19 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "42f29f47",
+   "id": "d6a7b76f",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:16:50.073627Z",
-     "iopub.status.busy": "2023-09-01T14:16:50.073435Z",
-     "iopub.status.idle": "2023-09-01T14:16:52.968362Z",
-     "shell.execute_reply": "2023-09-01T14:16:52.967407Z"
+     "iopub.execute_input": "2023-09-01T14:53:06.899776Z",
+     "iopub.status.busy": "2023-09-01T14:53:06.899528Z",
+     "iopub.status.idle": "2023-09-01T14:53:09.788640Z",
+     "shell.execute_reply": "2023-09-01T14:53:09.787798Z"
     },
     "papermill": {
-     "duration": 2.899701,
-     "end_time": "2023-09-01T14:16:52.970275",
+     "duration": 2.894122,
+     "end_time": "2023-09-01T14:53:09.790308",
      "exception": false,
-     "start_time": "2023-09-01T14:16:50.070574",
+     "start_time": "2023-09-01T14:53:06.896186",
      "status": "completed"
     },
     "tags": []
@@ -118,19 +118,19 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "975434e1",
+   "id": "7f700082",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:16:52.976123Z",
-     "iopub.status.busy": "2023-09-01T14:16:52.975923Z",
-     "iopub.status.idle": "2023-09-01T14:16:52.982702Z",
-     "shell.execute_reply": "2023-09-01T14:16:52.982003Z"
+     "iopub.execute_input": "2023-09-01T14:53:09.796934Z",
+     "iopub.status.busy": "2023-09-01T14:53:09.795855Z",
+     "iopub.status.idle": "2023-09-01T14:53:09.802854Z",
+     "shell.execute_reply": "2023-09-01T14:53:09.802131Z"
     },
     "papermill": {
-     "duration": 0.011228,
-     "end_time": "2023-09-01T14:16:52.984080",
+     "duration": 0.011279,
+     "end_time": "2023-09-01T14:53:09.804026",
      "exception": false,
-     "start_time": "2023-09-01T14:16:52.972852",
+     "start_time": "2023-09-01T14:53:09.792747",
      "status": "completed"
     },
     "tags": []
@@ -193,19 +193,19 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "a114c34a",
+   "id": "06ddc114",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:16:52.989037Z",
-     "iopub.status.busy": "2023-09-01T14:16:52.988868Z",
-     "iopub.status.idle": "2023-09-01T14:16:53.703329Z",
-     "shell.execute_reply": "2023-09-01T14:16:53.702511Z"
+     "iopub.execute_input": "2023-09-01T14:53:09.809533Z",
+     "iopub.status.busy": "2023-09-01T14:53:09.808951Z",
+     "iopub.status.idle": "2023-09-01T14:53:10.832978Z",
+     "shell.execute_reply": "2023-09-01T14:53:10.832125Z"
     },
     "papermill": {
-     "duration": 0.718925,
-     "end_time": "2023-09-01T14:16:53.705113",
+     "duration": 1.028292,
+     "end_time": "2023-09-01T14:53:10.834448",
      "exception": false,
-     "start_time": "2023-09-01T14:16:52.986188",
+     "start_time": "2023-09-01T14:53:09.806156",
      "status": "completed"
     },
     "tags": []
@@ -215,16 +215,22 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2023-09-01 14:16:53--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n",
-      "Resolving huggingface.co (huggingface.co)... "
+      "--2023-09-01 14:53:09--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n",
+      "Resolving huggingface.co (huggingface.co)... 18.172.134.4, 18.172.134.88, 18.172.134.124, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|18.172.134.4|:443... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "connected.\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "18.165.122.30, 18.165.122.101, 18.165.122.120, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|18.165.122.30|:443... connected.\r\n",
       "HTTP request sent, awaiting response... "
      ]
     },
@@ -233,7 +239,7 @@
      "output_type": "stream",
      "text": [
       "404 Not Found\r\n",
-      "2023-09-01 14:16:53 ERROR 404: Not Found.\r\n",
+      "2023-09-01 14:53:10 ERROR 404: Not Found.\r\n",
       "\r\n"
      ]
     },
@@ -242,8 +248,8 @@
      "output_type": "stream",
      "text": [
       "total 4.0K\r\n",
-      "drwxr-xr-x  2 root root   10 Sep  1 14:16 .\r\n",
-      "drwxr-xr-x 19 root root 4.0K Sep  1 14:16 ..\r\n"
+      "drwxr-xr-x  2 root root   10 Sep  1 14:53 .\r\n",
+      "drwxr-xr-x 19 root root 4.0K Sep  1 14:53 ..\r\n"
      ]
     }
    ],
@@ -258,13 +264,13 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7a29f104",
+   "id": "d36a2b7c",
    "metadata": {
     "papermill": {
-     "duration": 0.002139,
-     "end_time": "2023-09-01T14:16:53.709942",
+     "duration": 0.002214,
+     "end_time": "2023-09-01T14:53:10.839328",
      "exception": false,
-     "start_time": "2023-09-01T14:16:53.707803",
+     "start_time": "2023-09-01T14:53:10.837114",
      "status": "completed"
     },
     "tags": []
@@ -278,19 +284,19 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "c26a0790",
+   "id": "7a59bf6c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:16:53.715565Z",
-     "iopub.status.busy": "2023-09-01T14:16:53.715377Z",
-     "iopub.status.idle": "2023-09-01T14:17:00.686214Z",
-     "shell.execute_reply": "2023-09-01T14:17:00.685338Z"
+     "iopub.execute_input": "2023-09-01T14:53:10.845705Z",
+     "iopub.status.busy": "2023-09-01T14:53:10.844902Z",
+     "iopub.status.idle": "2023-09-01T14:53:17.948023Z",
+     "shell.execute_reply": "2023-09-01T14:53:17.947224Z"
     },
     "papermill": {
-     "duration": 7.05799,
-     "end_time": "2023-09-01T14:17:00.770174",
+     "duration": 7.16737,
+     "end_time": "2023-09-01T14:53:18.008993",
      "exception": false,
-     "start_time": "2023-09-01T14:16:53.712184",
+     "start_time": "2023-09-01T14:53:10.841623",
      "status": "completed"
     },
     "tags": []
@@ -307,133 +313,133 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 174 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+      "Generated a single JSONL file with 88 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 65 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+      "Generated a single JSONL file with 49 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
+      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
+      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
+      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
+      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
+      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 262 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
+      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
+      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 561 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 46 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+      "Generated a single JSONL file with 48 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
+      "Generated a single JSONL file with 42 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 43 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+      "Generated a single JSONL file with 31 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 51 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 73 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+      "Generated a single JSONL file with 108 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 106 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+      "Generated a single JSONL file with 37 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 89 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+      "Generated a single JSONL file with 58 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 39 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
      ]
     },
     {
@@ -447,175 +453,175 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
+      "Generated a single JSONL file with 63 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
+      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
+      "Generated a single JSONL file with 177 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 34 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+      "Generated a single JSONL file with 134 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
+      "Generated a single JSONL file with 77 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
+      "Generated a single JSONL file with 44 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 58 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 134 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
+      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
+      "Generated a single JSONL file with 28 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
+      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 32 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
+      "Generated a single JSONL file with 38 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
+      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
+      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+      "Generated a single JSONL file with 560 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 29 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+      "Generated a single JSONL file with 258 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
+      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
+      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
      ]
     },
     {
@@ -629,1554 +635,1554 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 27 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+      "Generated a single JSONL file with 32 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+      "Generated a single JSONL file with 31 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
+      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+      "Generated a single JSONL file with 27 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
+      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
+      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
+      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 18 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
+      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
+      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
+      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
+      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 27 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
+      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
+      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
+      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
+      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
+      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
+      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
+      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
+      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
+      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
+      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
+      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
+      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
+      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
+      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
+      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 35 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 13 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
+      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
+      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
+      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 34 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
+      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
+      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
+      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
+      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
+      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
+      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
+      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
+      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
+      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
+      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
+      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
+      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
+      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
+      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
+      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 21 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
+      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
+      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
+      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
+      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
+      "Generated a single JSONL file with 8 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
+      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
+      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
+      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
+      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
+      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
+      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
+      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
+      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
      ]
     },
     {
@@ -2190,1442 +2196,1456 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
+      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
+      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
+      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
+      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
+      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
+      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
+      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
+      "Generated a single JSONL file with 8 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
+      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
+      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
+      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
+      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
+      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
+      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
+      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
+      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
+      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
+      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
+      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
+      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
+      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
+      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
+      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
+      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 48 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
+      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
+      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
+      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
+      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 23 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
+      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
+      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
+      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
+      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
+      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
+      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
+      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
+      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
+      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
+      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
+      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
+      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
+      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
+      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
+      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
+      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
+      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
+      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
+      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
+      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
+      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
+      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
+      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
+      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
+      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+      "Generated a single JSONL file with 24 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
+      "Generated a single JSONL file with 32 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 31 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
+      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
+      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
+      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
+      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
+      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
      ]
     },
     {
@@ -3639,28 +3659,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
      ]
     },
     {
@@ -3681,21 +3701,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
      ]
     },
     {
@@ -3709,7 +3729,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
      ]
     },
     {
@@ -3723,14 +3743,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
      ]
     },
     {
@@ -3744,119 +3764,119 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
      ]
     },
     {
@@ -3870,21 +3890,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
      ]
     },
     {
@@ -3898,21 +3918,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
      ]
     },
     {
@@ -3926,70 +3939,70 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
      ]
     },
     {
@@ -4003,7 +4016,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
      ]
     },
     {
@@ -4017,21 +4030,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
      ]
     },
     {
@@ -4045,14 +4058,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
      ]
     },
     {
@@ -4066,21 +4079,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
      ]
     },
     {
@@ -4108,3787 +4114,3787 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  11K Sep  1 14:16 gen-word-10-count.jsonl\n"
+      "-rw-r--r-- 1 root root 9.5K Sep  1 14:53 gen-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  54K Sep  1 14:16 gen-word-100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  52K Sep  1 14:53 gen-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 490K Sep  1 14:16 gen-word-1000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 489K Sep  1 14:53 gen-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  56K Sep  1 14:16 gen-word-105-count.jsonl\n"
+      "-rw-r--r-- 1 root root  56K Sep  1 14:53 gen-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  59K Sep  1 14:16 gen-word-110-count.jsonl\n"
+      "-rw-r--r-- 1 root root  59K Sep  1 14:53 gen-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  22M Sep  1 14:16 gen-word-1100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  22M Sep  1 14:53 gen-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  63K Sep  1 14:16 gen-word-115-count.jsonl\n"
+      "-rw-r--r-- 1 root root  61K Sep  1 14:53 gen-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  65K Sep  1 14:16 gen-word-120-count.jsonl\n"
+      "-rw-r--r-- 1 root root  63K Sep  1 14:53 gen-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  23M Sep  1 14:16 gen-word-1200-count.jsonl\n"
+      "-rw-r--r-- 1 root root  23M Sep  1 14:53 gen-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  66K Sep  1 14:16 gen-word-125-count.jsonl\n"
+      "-rw-r--r-- 1 root root  69K Sep  1 14:53 gen-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69K Sep  1 14:16 gen-word-130-count.jsonl\n"
+      "-rw-r--r-- 1 root root  69K Sep  1 14:53 gen-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  25M Sep  1 14:16 gen-word-1300-count.jsonl\n"
+      "-rw-r--r-- 1 root root  25M Sep  1 14:53 gen-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69K Sep  1 14:16 gen-word-135-count.jsonl\n"
+      "-rw-r--r-- 1 root root  69K Sep  1 14:53 gen-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  75K Sep  1 14:16 gen-word-140-count.jsonl\n"
+      "-rw-r--r-- 1 root root  71K Sep  1 14:53 gen-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27M Sep  1 14:16 gen-word-1400-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27M Sep  1 14:53 gen-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  76K Sep  1 14:16 gen-word-145-count.jsonl\n"
+      "-rw-r--r-- 1 root root  76K Sep  1 14:53 gen-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  13K Sep  1 14:16 gen-word-15-count.jsonl\n"
+      "-rw-r--r-- 1 root root  12K Sep  1 14:53 gen-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  79K Sep  1 14:16 gen-word-150-count.jsonl\n"
+      "-rw-r--r-- 1 root root  78K Sep  1 14:53 gen-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29M Sep  1 14:16 gen-word-1500-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29M Sep  1 14:53 gen-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  80K Sep  1 14:16 gen-word-155-count.jsonl\n"
+      "-rw-r--r-- 1 root root  80K Sep  1 14:53 gen-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  84K Sep  1 14:16 gen-word-160-count.jsonl\n"
+      "-rw-r--r-- 1 root root  83K Sep  1 14:53 gen-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31M Sep  1 14:16 gen-word-1600-count.jsonl\n"
+      "-rw-r--r-- 1 root root  31M Sep  1 14:53 gen-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  85K Sep  1 14:16 gen-word-165-count.jsonl\n"
+      "-rw-r--r-- 1 root root  84K Sep  1 14:53 gen-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  89K Sep  1 14:16 gen-word-170-count.jsonl\n"
+      "-rw-r--r-- 1 root root  86K Sep  1 14:53 gen-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  33M Sep  1 14:16 gen-word-1700-count.jsonl\n"
+      "-rw-r--r-- 1 root root  33M Sep  1 14:53 gen-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  90K Sep  1 14:16 gen-word-175-count.jsonl\n"
+      "-rw-r--r-- 1 root root  91K Sep  1 14:53 gen-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  93K Sep  1 14:16 gen-word-180-count.jsonl\n"
+      "-rw-r--r-- 1 root root  91K Sep  1 14:53 gen-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  35M Sep  1 14:16 gen-word-1800-count.jsonl\n"
+      "-rw-r--r-- 1 root root  35M Sep  1 14:53 gen-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  95K Sep  1 14:16 gen-word-185-count.jsonl\n"
+      "-rw-r--r-- 1 root root  94K Sep  1 14:53 gen-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  97K Sep  1 14:16 gen-word-190-count.jsonl\n"
+      "-rw-r--r-- 1 root root  97K Sep  1 14:53 gen-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  37M Sep  1 14:16 gen-word-1900-count.jsonl\n"
+      "-rw-r--r-- 1 root root  37M Sep  1 14:53 gen-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 100K Sep  1 14:16 gen-word-195-count.jsonl\n"
+      "-rw-r--r-- 1 root root 100K Sep  1 14:53 gen-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  15K Sep  1 14:16 gen-word-20-count.jsonl\n"
+      "-rw-r--r-- 1 root root  15K Sep  1 14:53 gen-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 101K Sep  1 14:16 gen-word-200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 103K Sep  1 14:53 gen-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  39M Sep  1 14:16 gen-word-2000-count.jsonl\n"
+      "-rw-r--r-- 1 root root  39M Sep  1 14:53 gen-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 104K Sep  1 14:16 gen-word-205-count.jsonl\n"
+      "-rw-r--r-- 1 root root 105K Sep  1 14:53 gen-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 108K Sep  1 14:16 gen-word-210-count.jsonl\n"
+      "-rw-r--r-- 1 root root 108K Sep  1 14:53 gen-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  41M Sep  1 14:16 gen-word-2100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  41M Sep  1 14:53 gen-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 110K Sep  1 14:16 gen-word-215-count.jsonl\n"
+      "-rw-r--r-- 1 root root 108K Sep  1 14:53 gen-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 111K Sep  1 14:16 gen-word-220-count.jsonl\n"
+      "-rw-r--r-- 1 root root 114K Sep  1 14:53 gen-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  42M Sep  1 14:16 gen-word-2200-count.jsonl\n"
+      "-rw-r--r-- 1 root root  43M Sep  1 14:53 gen-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 114K Sep  1 14:16 gen-word-225-count.jsonl\n"
+      "-rw-r--r-- 1 root root 115K Sep  1 14:53 gen-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 116K Sep  1 14:16 gen-word-230-count.jsonl\n"
+      "-rw-r--r-- 1 root root 118K Sep  1 14:53 gen-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  44M Sep  1 14:16 gen-word-2300-count.jsonl\n"
+      "-rw-r--r-- 1 root root  44M Sep  1 14:53 gen-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 123K Sep  1 14:16 gen-word-235-count.jsonl\n"
+      "-rw-r--r-- 1 root root 120K Sep  1 14:53 gen-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 122K Sep  1 14:16 gen-word-240-count.jsonl\n"
+      "-rw-r--r-- 1 root root 122K Sep  1 14:53 gen-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  46M Sep  1 14:16 gen-word-2400-count.jsonl\n"
+      "-rw-r--r-- 1 root root  46M Sep  1 14:53 gen-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 125K Sep  1 14:16 gen-word-245-count.jsonl\n"
+      "-rw-r--r-- 1 root root 125K Sep  1 14:53 gen-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  17K Sep  1 14:16 gen-word-25-count.jsonl\n"
+      "-rw-r--r-- 1 root root  18K Sep  1 14:53 gen-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 126K Sep  1 14:16 gen-word-250-count.jsonl\n"
+      "-rw-r--r-- 1 root root 127K Sep  1 14:53 gen-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  48M Sep  1 14:16 gen-word-2500-count.jsonl\n"
+      "-rw-r--r-- 1 root root  48M Sep  1 14:53 gen-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 127K Sep  1 14:16 gen-word-255-count.jsonl\n"
+      "-rw-r--r-- 1 root root 131K Sep  1 14:53 gen-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 131K Sep  1 14:16 gen-word-260-count.jsonl\n"
+      "-rw-r--r-- 1 root root 129K Sep  1 14:53 gen-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  50M Sep  1 14:16 gen-word-2600-count.jsonl\n"
+      "-rw-r--r-- 1 root root  50M Sep  1 14:53 gen-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 132K Sep  1 14:16 gen-word-265-count.jsonl\n"
+      "-rw-r--r-- 1 root root 132K Sep  1 14:53 gen-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 139K Sep  1 14:16 gen-word-270-count.jsonl\n"
+      "-rw-r--r-- 1 root root 136K Sep  1 14:53 gen-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  52M Sep  1 14:16 gen-word-2700-count.jsonl\n"
+      "-rw-r--r-- 1 root root  52M Sep  1 14:53 gen-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 140K Sep  1 14:16 gen-word-275-count.jsonl\n"
+      "-rw-r--r-- 1 root root 137K Sep  1 14:53 gen-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 143K Sep  1 14:16 gen-word-280-count.jsonl\n"
+      "-rw-r--r-- 1 root root 143K Sep  1 14:53 gen-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  54M Sep  1 14:16 gen-word-2800-count.jsonl\n"
+      "-rw-r--r-- 1 root root  54M Sep  1 14:53 gen-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 139K Sep  1 14:16 gen-word-285-count.jsonl\n"
+      "-rw-r--r-- 1 root root 142K Sep  1 14:53 gen-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 142K Sep  1 14:16 gen-word-290-count.jsonl\n"
+      "-rw-r--r-- 1 root root 145K Sep  1 14:53 gen-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  56M Sep  1 14:16 gen-word-2900-count.jsonl\n"
+      "-rw-r--r-- 1 root root  56M Sep  1 14:53 gen-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 147K Sep  1 14:16 gen-word-295-count.jsonl\n"
+      "-rw-r--r-- 1 root root 148K Sep  1 14:53 gen-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  20K Sep  1 14:16 gen-word-30-count.jsonl\n"
+      "-rw-r--r-- 1 root root  20K Sep  1 14:53 gen-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 147K Sep  1 14:16 gen-word-300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 147K Sep  1 14:53 gen-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  58M Sep  1 14:16 gen-word-3000-count.jsonl\n"
+      "-rw-r--r-- 1 root root  58M Sep  1 14:53 gen-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 154K Sep  1 14:16 gen-word-305-count.jsonl\n"
+      "-rw-r--r-- 1 root root 153K Sep  1 14:53 gen-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 153K Sep  1 14:16 gen-word-310-count.jsonl\n"
+      "-rw-r--r-- 1 root root 156K Sep  1 14:53 gen-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  60M Sep  1 14:16 gen-word-3100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  60M Sep  1 14:53 gen-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 157K Sep  1 14:16 gen-word-315-count.jsonl\n"
+      "-rw-r--r-- 1 root root 161K Sep  1 14:53 gen-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 160K Sep  1 14:16 gen-word-320-count.jsonl\n"
+      "-rw-r--r-- 1 root root 158K Sep  1 14:53 gen-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  61M Sep  1 14:16 gen-word-3200-count.jsonl\n"
+      "-rw-r--r-- 1 root root  61M Sep  1 14:53 gen-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 165K Sep  1 14:16 gen-word-325-count.jsonl\n"
+      "-rw-r--r-- 1 root root 160K Sep  1 14:53 gen-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 166K Sep  1 14:16 gen-word-330-count.jsonl\n"
+      "-rw-r--r-- 1 root root 164K Sep  1 14:53 gen-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  63M Sep  1 14:16 gen-word-3300-count.jsonl\n"
+      "-rw-r--r-- 1 root root  63M Sep  1 14:53 gen-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 166K Sep  1 14:16 gen-word-335-count.jsonl\n"
+      "-rw-r--r-- 1 root root 172K Sep  1 14:53 gen-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 172K Sep  1 14:16 gen-word-340-count.jsonl\n"
+      "-rw-r--r-- 1 root root 176K Sep  1 14:53 gen-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  65M Sep  1 14:16 gen-word-3400-count.jsonl\n"
+      "-rw-r--r-- 1 root root  65M Sep  1 14:53 gen-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 174K Sep  1 14:16 gen-word-345-count.jsonl\n"
+      "-rw-r--r-- 1 root root 176K Sep  1 14:53 gen-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  23K Sep  1 14:16 gen-word-35-count.jsonl\n"
+      "-rw-r--r-- 1 root root  22K Sep  1 14:53 gen-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 177K Sep  1 14:16 gen-word-350-count.jsonl\n"
+      "-rw-r--r-- 1 root root 176K Sep  1 14:53 gen-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  67M Sep  1 14:16 gen-word-3500-count.jsonl\n"
+      "-rw-r--r-- 1 root root  67M Sep  1 14:53 gen-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 177K Sep  1 14:16 gen-word-355-count.jsonl\n"
+      "-rw-r--r-- 1 root root 178K Sep  1 14:53 gen-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 181K Sep  1 14:16 gen-word-360-count.jsonl\n"
+      "-rw-r--r-- 1 root root 179K Sep  1 14:53 gen-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  69M Sep  1 14:16 gen-word-3600-count.jsonl\n"
+      "-rw-r--r-- 1 root root  69M Sep  1 14:53 gen-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 180K Sep  1 14:16 gen-word-365-count.jsonl\n"
+      "-rw-r--r-- 1 root root 186K Sep  1 14:53 gen-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 187K Sep  1 14:16 gen-word-370-count.jsonl\n"
+      "-rw-r--r-- 1 root root 179K Sep  1 14:53 gen-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  71M Sep  1 14:16 gen-word-3700-count.jsonl\n"
+      "-rw-r--r-- 1 root root  71M Sep  1 14:53 gen-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 184K Sep  1 14:16 gen-word-375-count.jsonl\n"
+      "-rw-r--r-- 1 root root 186K Sep  1 14:53 gen-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 190K Sep  1 14:16 gen-word-380-count.jsonl\n"
+      "-rw-r--r-- 1 root root 190K Sep  1 14:53 gen-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  73M Sep  1 14:16 gen-word-3800-count.jsonl\n"
+      "-rw-r--r-- 1 root root  73M Sep  1 14:53 gen-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 187K Sep  1 14:16 gen-word-385-count.jsonl\n"
+      "-rw-r--r-- 1 root root 192K Sep  1 14:53 gen-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 194K Sep  1 14:16 gen-word-390-count.jsonl\n"
+      "-rw-r--r-- 1 root root 196K Sep  1 14:53 gen-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  75M Sep  1 14:16 gen-word-3900-count.jsonl\n"
+      "-rw-r--r-- 1 root root  75M Sep  1 14:53 gen-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 194K Sep  1 14:16 gen-word-395-count.jsonl\n"
+      "-rw-r--r-- 1 root root 194K Sep  1 14:53 gen-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  25K Sep  1 14:16 gen-word-40-count.jsonl\n"
+      "-rw-r--r-- 1 root root  24K Sep  1 14:53 gen-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 200K Sep  1 14:16 gen-word-400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 196K Sep  1 14:53 gen-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  77M Sep  1 14:16 gen-word-4000-count.jsonl\n"
+      "-rw-r--r-- 1 root root  77M Sep  1 14:53 gen-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 203K Sep  1 14:16 gen-word-405-count.jsonl\n"
+      "-rw-r--r-- 1 root root 201K Sep  1 14:53 gen-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 207K Sep  1 14:16 gen-word-410-count.jsonl\n"
+      "-rw-r--r-- 1 root root 204K Sep  1 14:53 gen-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  79M Sep  1 14:16 gen-word-4100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  79M Sep  1 14:53 gen-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 203K Sep  1 14:16 gen-word-415-count.jsonl\n"
+      "-rw-r--r-- 1 root root 210K Sep  1 14:53 gen-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 205K Sep  1 14:16 gen-word-420-count.jsonl\n"
+      "-rw-r--r-- 1 root root 209K Sep  1 14:53 gen-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  81M Sep  1 14:16 gen-word-4200-count.jsonl\n"
+      "-rw-r--r-- 1 root root  80M Sep  1 14:53 gen-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 216K Sep  1 14:16 gen-word-425-count.jsonl\n"
+      "-rw-r--r-- 1 root root 214K Sep  1 14:53 gen-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 213K Sep  1 14:16 gen-word-430-count.jsonl\n"
+      "-rw-r--r-- 1 root root 215K Sep  1 14:53 gen-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  82M Sep  1 14:16 gen-word-4300-count.jsonl\n"
+      "-rw-r--r-- 1 root root  82M Sep  1 14:53 gen-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 220K Sep  1 14:16 gen-word-435-count.jsonl\n"
+      "-rw-r--r-- 1 root root 218K Sep  1 14:53 gen-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 223K Sep  1 14:16 gen-word-440-count.jsonl\n"
+      "-rw-r--r-- 1 root root 218K Sep  1 14:53 gen-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  84M Sep  1 14:16 gen-word-4400-count.jsonl\n"
+      "-rw-r--r-- 1 root root  84M Sep  1 14:53 gen-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 223K Sep  1 14:16 gen-word-445-count.jsonl\n"
+      "-rw-r--r-- 1 root root 220K Sep  1 14:53 gen-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 gen-word-45-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 gen-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 220K Sep  1 14:16 gen-word-450-count.jsonl\n"
+      "-rw-r--r-- 1 root root 225K Sep  1 14:53 gen-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  86M Sep  1 14:16 gen-word-4500-count.jsonl\n"
+      "-rw-r--r-- 1 root root  86M Sep  1 14:53 gen-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 230K Sep  1 14:16 gen-word-455-count.jsonl\n"
+      "-rw-r--r-- 1 root root 226K Sep  1 14:53 gen-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 226K Sep  1 14:16 gen-word-460-count.jsonl\n"
+      "-rw-r--r-- 1 root root 225K Sep  1 14:53 gen-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  88M Sep  1 14:16 gen-word-4600-count.jsonl\n"
+      "-rw-r--r-- 1 root root  88M Sep  1 14:53 gen-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 233K Sep  1 14:16 gen-word-465-count.jsonl\n"
+      "-rw-r--r-- 1 root root 227K Sep  1 14:53 gen-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 232K Sep  1 14:16 gen-word-470-count.jsonl\n"
+      "-rw-r--r-- 1 root root 234K Sep  1 14:53 gen-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  90M Sep  1 14:16 gen-word-4700-count.jsonl\n"
+      "-rw-r--r-- 1 root root  90M Sep  1 14:53 gen-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 237K Sep  1 14:16 gen-word-475-count.jsonl\n"
+      "-rw-r--r-- 1 root root 233K Sep  1 14:53 gen-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 238K Sep  1 14:16 gen-word-480-count.jsonl\n"
+      "-rw-r--r-- 1 root root 232K Sep  1 14:53 gen-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  92M Sep  1 14:16 gen-word-4800-count.jsonl\n"
+      "-rw-r--r-- 1 root root  92M Sep  1 14:53 gen-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 239K Sep  1 14:16 gen-word-485-count.jsonl\n"
+      "-rw-r--r-- 1 root root 238K Sep  1 14:53 gen-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 244K Sep  1 14:16 gen-word-490-count.jsonl\n"
+      "-rw-r--r-- 1 root root 244K Sep  1 14:53 gen-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  94M Sep  1 14:16 gen-word-4900-count.jsonl\n"
+      "-rw-r--r-- 1 root root  94M Sep  1 14:53 gen-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 246K Sep  1 14:16 gen-word-495-count.jsonl\n"
+      "-rw-r--r-- 1 root root 249K Sep  1 14:53 gen-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 7.6K Sep  1 14:16 gen-word-5-count.jsonl\n"
+      "-rw-r--r-- 1 root root 7.4K Sep  1 14:53 gen-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:16 gen-word-50-count.jsonl\n"
+      "-rw-r--r-- 1 root root  31K Sep  1 14:53 gen-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 243K Sep  1 14:16 gen-word-500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 243K Sep  1 14:53 gen-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  96M Sep  1 14:16 gen-word-5000-count.jsonl\n"
+      "-rw-r--r-- 1 root root  96M Sep  1 14:53 gen-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 252K Sep  1 14:16 gen-word-505-count.jsonl\n"
+      "-rw-r--r-- 1 root root 254K Sep  1 14:53 gen-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 254K Sep  1 14:16 gen-word-510-count.jsonl\n"
+      "-rw-r--r-- 1 root root 253K Sep  1 14:53 gen-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  97M Sep  1 14:16 gen-word-5100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  98M Sep  1 14:53 gen-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 255K Sep  1 14:16 gen-word-515-count.jsonl\n"
+      "-rw-r--r-- 1 root root 259K Sep  1 14:53 gen-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 261K Sep  1 14:16 gen-word-520-count.jsonl\n"
+      "-rw-r--r-- 1 root root 260K Sep  1 14:53 gen-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  99M Sep  1 14:16 gen-word-5200-count.jsonl\n"
+      "-rw-r--r-- 1 root root  99M Sep  1 14:53 gen-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 259K Sep  1 14:16 gen-word-525-count.jsonl\n"
+      "-rw-r--r-- 1 root root 254K Sep  1 14:53 gen-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 263K Sep  1 14:16 gen-word-530-count.jsonl\n"
+      "-rw-r--r-- 1 root root 264K Sep  1 14:53 gen-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 101M Sep  1 14:16 gen-word-5300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 101M Sep  1 14:53 gen-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 263K Sep  1 14:16 gen-word-535-count.jsonl\n"
+      "-rw-r--r-- 1 root root 262K Sep  1 14:53 gen-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 268K Sep  1 14:16 gen-word-540-count.jsonl\n"
+      "-rw-r--r-- 1 root root 267K Sep  1 14:53 gen-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 103M Sep  1 14:16 gen-word-5400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 103M Sep  1 14:53 gen-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 270K Sep  1 14:16 gen-word-545-count.jsonl\n"
+      "-rw-r--r-- 1 root root 272K Sep  1 14:53 gen-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  32K Sep  1 14:16 gen-word-55-count.jsonl\n"
+      "-rw-r--r-- 1 root root  31K Sep  1 14:53 gen-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 271K Sep  1 14:16 gen-word-550-count.jsonl\n"
+      "-rw-r--r-- 1 root root 278K Sep  1 14:53 gen-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 105M Sep  1 14:16 gen-word-5500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 105M Sep  1 14:53 gen-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 270K Sep  1 14:16 gen-word-555-count.jsonl\n"
+      "-rw-r--r-- 1 root root 276K Sep  1 14:53 gen-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 276K Sep  1 14:16 gen-word-560-count.jsonl\n"
+      "-rw-r--r-- 1 root root 278K Sep  1 14:53 gen-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 107M Sep  1 14:16 gen-word-5600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 107M Sep  1 14:53 gen-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 282K Sep  1 14:16 gen-word-565-count.jsonl\n"
+      "-rw-r--r-- 1 root root 285K Sep  1 14:53 gen-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 278K Sep  1 14:16 gen-word-570-count.jsonl\n"
+      "-rw-r--r-- 1 root root 281K Sep  1 14:53 gen-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 109M Sep  1 14:16 gen-word-5700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 109M Sep  1 14:53 gen-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 277K Sep  1 14:16 gen-word-575-count.jsonl\n"
+      "-rw-r--r-- 1 root root 289K Sep  1 14:53 gen-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 282K Sep  1 14:16 gen-word-580-count.jsonl\n"
+      "-rw-r--r-- 1 root root 286K Sep  1 14:53 gen-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 111M Sep  1 14:16 gen-word-5800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 111M Sep  1 14:53 gen-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 288K Sep  1 14:16 gen-word-585-count.jsonl\n"
+      "-rw-r--r-- 1 root root 291K Sep  1 14:53 gen-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 291K Sep  1 14:16 gen-word-590-count.jsonl\n"
+      "-rw-r--r-- 1 root root 294K Sep  1 14:53 gen-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 113M Sep  1 14:16 gen-word-5900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 113M Sep  1 14:53 gen-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 298K Sep  1 14:16 gen-word-595-count.jsonl\n"
+      "-rw-r--r-- 1 root root 295K Sep  1 14:53 gen-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  34K Sep  1 14:16 gen-word-60-count.jsonl\n"
+      "-rw-r--r-- 1 root root  34K Sep  1 14:53 gen-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 300K Sep  1 14:16 gen-word-600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 298K Sep  1 14:53 gen-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 115M Sep  1 14:16 gen-word-6000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 115M Sep  1 14:53 gen-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 298K Sep  1 14:16 gen-word-605-count.jsonl\n"
+      "-rw-r--r-- 1 root root 302K Sep  1 14:53 gen-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 304K Sep  1 14:16 gen-word-610-count.jsonl\n"
+      "-rw-r--r-- 1 root root 306K Sep  1 14:53 gen-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 117M Sep  1 14:16 gen-word-6100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 117M Sep  1 14:53 gen-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 306K Sep  1 14:16 gen-word-615-count.jsonl\n"
+      "-rw-r--r-- 1 root root 302K Sep  1 14:53 gen-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 303K Sep  1 14:16 gen-word-620-count.jsonl\n"
+      "-rw-r--r-- 1 root root 304K Sep  1 14:53 gen-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 118M Sep  1 14:16 gen-word-6200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 118M Sep  1 14:53 gen-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 312K Sep  1 14:16 gen-word-625-count.jsonl\n"
+      "-rw-r--r-- 1 root root 313K Sep  1 14:53 gen-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 313K Sep  1 14:16 gen-word-630-count.jsonl\n"
+      "-rw-r--r-- 1 root root 311K Sep  1 14:53 gen-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 120M Sep  1 14:16 gen-word-6300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 120M Sep  1 14:53 gen-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 312K Sep  1 14:16 gen-word-635-count.jsonl\n"
+      "-rw-r--r-- 1 root root 317K Sep  1 14:53 gen-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 322K Sep  1 14:16 gen-word-640-count.jsonl\n"
+      "-rw-r--r-- 1 root root 319K Sep  1 14:53 gen-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 122M Sep  1 14:16 gen-word-6400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 122M Sep  1 14:53 gen-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 319K Sep  1 14:16 gen-word-645-count.jsonl\n"
+      "-rw-r--r-- 1 root root 320K Sep  1 14:53 gen-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  35K Sep  1 14:16 gen-word-65-count.jsonl\n"
+      "-rw-r--r-- 1 root root  37K Sep  1 14:53 gen-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 322K Sep  1 14:16 gen-word-650-count.jsonl\n"
+      "-rw-r--r-- 1 root root 320K Sep  1 14:53 gen-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 124M Sep  1 14:16 gen-word-6500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 124M Sep  1 14:53 gen-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 321K Sep  1 14:16 gen-word-655-count.jsonl\n"
+      "-rw-r--r-- 1 root root 323K Sep  1 14:53 gen-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 333K Sep  1 14:16 gen-word-660-count.jsonl\n"
+      "-rw-r--r-- 1 root root 325K Sep  1 14:53 gen-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 126M Sep  1 14:16 gen-word-6600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 126M Sep  1 14:53 gen-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 330K Sep  1 14:16 gen-word-665-count.jsonl\n"
+      "-rw-r--r-- 1 root root 327K Sep  1 14:53 gen-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 334K Sep  1 14:16 gen-word-670-count.jsonl\n"
+      "-rw-r--r-- 1 root root 332K Sep  1 14:53 gen-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 128M Sep  1 14:16 gen-word-6700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 128M Sep  1 14:53 gen-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 333K Sep  1 14:16 gen-word-675-count.jsonl\n"
+      "-rw-r--r-- 1 root root 328K Sep  1 14:53 gen-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 337K Sep  1 14:16 gen-word-680-count.jsonl\n"
+      "-rw-r--r-- 1 root root 334K Sep  1 14:53 gen-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 130M Sep  1 14:16 gen-word-6800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 130M Sep  1 14:53 gen-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 336K Sep  1 14:16 gen-word-685-count.jsonl\n"
+      "-rw-r--r-- 1 root root 338K Sep  1 14:53 gen-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 338K Sep  1 14:16 gen-word-690-count.jsonl\n"
+      "-rw-r--r-- 1 root root 343K Sep  1 14:53 gen-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 132M Sep  1 14:16 gen-word-6900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 132M Sep  1 14:53 gen-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 336K Sep  1 14:16 gen-word-695-count.jsonl\n"
+      "-rw-r--r-- 1 root root 339K Sep  1 14:53 gen-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  40K Sep  1 14:16 gen-word-70-count.jsonl\n"
+      "-rw-r--r-- 1 root root  38K Sep  1 14:53 gen-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 351K Sep  1 14:16 gen-word-700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 348K Sep  1 14:53 gen-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 133M Sep  1 14:16 gen-word-7000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 134M Sep  1 14:53 gen-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 347K Sep  1 14:16 gen-word-705-count.jsonl\n"
+      "-rw-r--r-- 1 root root 346K Sep  1 14:53 gen-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 346K Sep  1 14:16 gen-word-710-count.jsonl\n"
+      "-rw-r--r-- 1 root root 353K Sep  1 14:53 gen-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 135M Sep  1 14:16 gen-word-7100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 136M Sep  1 14:53 gen-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 357K Sep  1 14:16 gen-word-715-count.jsonl\n"
+      "-rw-r--r-- 1 root root 344K Sep  1 14:53 gen-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 357K Sep  1 14:16 gen-word-720-count.jsonl\n"
+      "-rw-r--r-- 1 root root 353K Sep  1 14:53 gen-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 137M Sep  1 14:16 gen-word-7200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 137M Sep  1 14:53 gen-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 358K Sep  1 14:16 gen-word-725-count.jsonl\n"
+      "-rw-r--r-- 1 root root 363K Sep  1 14:53 gen-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 362K Sep  1 14:16 gen-word-730-count.jsonl\n"
+      "-rw-r--r-- 1 root root 364K Sep  1 14:53 gen-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 139M Sep  1 14:16 gen-word-7300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 139M Sep  1 14:53 gen-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 362K Sep  1 14:16 gen-word-735-count.jsonl\n"
+      "-rw-r--r-- 1 root root 367K Sep  1 14:53 gen-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 367K Sep  1 14:16 gen-word-740-count.jsonl\n"
+      "-rw-r--r-- 1 root root 368K Sep  1 14:53 gen-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 141M Sep  1 14:17 gen-word-7400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 141M Sep  1 14:53 gen-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 373K Sep  1 14:16 gen-word-745-count.jsonl\n"
+      "-rw-r--r-- 1 root root 362K Sep  1 14:53 gen-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  42K Sep  1 14:16 gen-word-75-count.jsonl\n"
+      "-rw-r--r-- 1 root root  43K Sep  1 14:53 gen-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 364K Sep  1 14:16 gen-word-750-count.jsonl\n"
+      "-rw-r--r-- 1 root root 371K Sep  1 14:53 gen-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 143M Sep  1 14:17 gen-word-7500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 143M Sep  1 14:53 gen-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 374K Sep  1 14:16 gen-word-755-count.jsonl\n"
+      "-rw-r--r-- 1 root root 371K Sep  1 14:53 gen-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 378K Sep  1 14:16 gen-word-760-count.jsonl\n"
+      "-rw-r--r-- 1 root root 379K Sep  1 14:53 gen-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 145M Sep  1 14:17 gen-word-7600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 145M Sep  1 14:53 gen-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 378K Sep  1 14:16 gen-word-765-count.jsonl\n"
+      "-rw-r--r-- 1 root root 380K Sep  1 14:53 gen-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 381K Sep  1 14:16 gen-word-770-count.jsonl\n"
+      "-rw-r--r-- 1 root root 378K Sep  1 14:53 gen-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 147M Sep  1 14:17 gen-word-7700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 147M Sep  1 14:53 gen-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 386K Sep  1 14:16 gen-word-775-count.jsonl\n"
+      "-rw-r--r-- 1 root root 379K Sep  1 14:53 gen-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 388K Sep  1 14:16 gen-word-780-count.jsonl\n"
+      "-rw-r--r-- 1 root root 389K Sep  1 14:53 gen-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 149M Sep  1 14:17 gen-word-7800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 149M Sep  1 14:53 gen-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 389K Sep  1 14:16 gen-word-785-count.jsonl\n"
+      "-rw-r--r-- 1 root root 389K Sep  1 14:53 gen-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 384K Sep  1 14:16 gen-word-790-count.jsonl\n"
+      "-rw-r--r-- 1 root root 385K Sep  1 14:53 gen-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 151M Sep  1 14:17 gen-word-7900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 151M Sep  1 14:53 gen-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 384K Sep  1 14:16 gen-word-795-count.jsonl\n"
+      "-rw-r--r-- 1 root root 386K Sep  1 14:53 gen-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  45K Sep  1 14:16 gen-word-80-count.jsonl\n"
+      "-rw-r--r-- 1 root root  43K Sep  1 14:53 gen-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 394K Sep  1 14:16 gen-word-800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 388K Sep  1 14:53 gen-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 153M Sep  1 14:17 gen-word-8000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 153M Sep  1 14:53 gen-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 397K Sep  1 14:16 gen-word-805-count.jsonl\n"
+      "-rw-r--r-- 1 root root 389K Sep  1 14:53 gen-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 399K Sep  1 14:16 gen-word-810-count.jsonl\n"
+      "-rw-r--r-- 1 root root 399K Sep  1 14:53 gen-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 406K Sep  1 14:16 gen-word-815-count.jsonl\n"
+      "-rw-r--r-- 1 root root 403K Sep  1 14:53 gen-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 400K Sep  1 14:16 gen-word-820-count.jsonl\n"
+      "-rw-r--r-- 1 root root 404K Sep  1 14:53 gen-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 408K Sep  1 14:16 gen-word-825-count.jsonl\n"
+      "-rw-r--r-- 1 root root 406K Sep  1 14:53 gen-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 412K Sep  1 14:16 gen-word-830-count.jsonl\n"
+      "-rw-r--r-- 1 root root 408K Sep  1 14:53 gen-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 412K Sep  1 14:16 gen-word-835-count.jsonl\n"
+      "-rw-r--r-- 1 root root 410K Sep  1 14:53 gen-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 414K Sep  1 14:16 gen-word-840-count.jsonl\n"
+      "-rw-r--r-- 1 root root 411K Sep  1 14:53 gen-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 420K Sep  1 14:16 gen-word-845-count.jsonl\n"
+      "-rw-r--r-- 1 root root 416K Sep  1 14:53 gen-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  45K Sep  1 14:16 gen-word-85-count.jsonl\n"
+      "-rw-r--r-- 1 root root  46K Sep  1 14:53 gen-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 414K Sep  1 14:16 gen-word-850-count.jsonl\n"
+      "-rw-r--r-- 1 root root 423K Sep  1 14:53 gen-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 418K Sep  1 14:16 gen-word-855-count.jsonl\n"
+      "-rw-r--r-- 1 root root 424K Sep  1 14:53 gen-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 421K Sep  1 14:16 gen-word-860-count.jsonl\n"
+      "-rw-r--r-- 1 root root 427K Sep  1 14:53 gen-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 422K Sep  1 14:16 gen-word-865-count.jsonl\n"
+      "-rw-r--r-- 1 root root 417K Sep  1 14:53 gen-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 432K Sep  1 14:16 gen-word-870-count.jsonl\n"
+      "-rw-r--r-- 1 root root 420K Sep  1 14:53 gen-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 430K Sep  1 14:16 gen-word-875-count.jsonl\n"
+      "-rw-r--r-- 1 root root 431K Sep  1 14:53 gen-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 433K Sep  1 14:16 gen-word-880-count.jsonl\n"
+      "-rw-r--r-- 1 root root 434K Sep  1 14:53 gen-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 435K Sep  1 14:16 gen-word-885-count.jsonl\n"
+      "-rw-r--r-- 1 root root 437K Sep  1 14:53 gen-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 438K Sep  1 14:16 gen-word-890-count.jsonl\n"
+      "-rw-r--r-- 1 root root 434K Sep  1 14:53 gen-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 447K Sep  1 14:16 gen-word-895-count.jsonl\n"
+      "-rw-r--r-- 1 root root 442K Sep  1 14:53 gen-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  47K Sep  1 14:16 gen-word-90-count.jsonl\n"
+      "-rw-r--r-- 1 root root  52K Sep  1 14:53 gen-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 449K Sep  1 14:16 gen-word-900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 440K Sep  1 14:53 gen-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 449K Sep  1 14:16 gen-word-905-count.jsonl\n"
+      "-rw-r--r-- 1 root root 445K Sep  1 14:53 gen-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 448K Sep  1 14:16 gen-word-910-count.jsonl\n"
+      "-rw-r--r-- 1 root root 450K Sep  1 14:53 gen-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 445K Sep  1 14:16 gen-word-915-count.jsonl\n"
+      "-rw-r--r-- 1 root root 452K Sep  1 14:53 gen-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 456K Sep  1 14:16 gen-word-920-count.jsonl\n"
+      "-rw-r--r-- 1 root root 453K Sep  1 14:53 gen-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 457K Sep  1 14:16 gen-word-925-count.jsonl\n"
+      "-rw-r--r-- 1 root root 456K Sep  1 14:53 gen-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 454K Sep  1 14:16 gen-word-930-count.jsonl\n"
+      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 465K Sep  1 14:16 gen-word-935-count.jsonl\n"
+      "-rw-r--r-- 1 root root 453K Sep  1 14:53 gen-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 464K Sep  1 14:16 gen-word-940-count.jsonl\n"
+      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 460K Sep  1 14:16 gen-word-945-count.jsonl\n"
+      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  51K Sep  1 14:16 gen-word-95-count.jsonl\n"
+      "-rw-r--r-- 1 root root  50K Sep  1 14:53 gen-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 475K Sep  1 14:16 gen-word-950-count.jsonl\n"
+      "-rw-r--r-- 1 root root 469K Sep  1 14:53 gen-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 475K Sep  1 14:16 gen-word-955-count.jsonl\n"
+      "-rw-r--r-- 1 root root 461K Sep  1 14:53 gen-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 484K Sep  1 14:16 gen-word-960-count.jsonl\n"
+      "-rw-r--r-- 1 root root 478K Sep  1 14:53 gen-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 477K Sep  1 14:16 gen-word-965-count.jsonl\n"
+      "-rw-r--r-- 1 root root 472K Sep  1 14:53 gen-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 484K Sep  1 14:16 gen-word-970-count.jsonl\n"
+      "-rw-r--r-- 1 root root 484K Sep  1 14:53 gen-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 482K Sep  1 14:16 gen-word-975-count.jsonl\n"
+      "-rw-r--r-- 1 root root 478K Sep  1 14:53 gen-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 488K Sep  1 14:16 gen-word-980-count.jsonl\n"
+      "-rw-r--r-- 1 root root 479K Sep  1 14:53 gen-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 489K Sep  1 14:16 gen-word-985-count.jsonl\n"
+      "-rw-r--r-- 1 root root 482K Sep  1 14:53 gen-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 486K Sep  1 14:16 gen-word-990-count.jsonl\n"
+      "-rw-r--r-- 1 root root 495K Sep  1 14:53 gen-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 483K Sep  1 14:16 gen-word-995-count.jsonl\n"
+      "-rw-r--r-- 1 root root 480K Sep  1 14:53 gen-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  56K Sep  1 14:16 shuffle-word-10-count.jsonl\n"
+      "-rw-r--r-- 1 root root  52K Sep  1 14:53 shuffle-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-100-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-1000-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-105-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:16 shuffle-word-110-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep  1 14:16 shuffle-word-1100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 519K Sep  1 14:53 shuffle-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-115-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-120-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 524K Sep  1 14:16 shuffle-word-1200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 522K Sep  1 14:53 shuffle-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-125-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-130-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 518K Sep  1 14:16 shuffle-word-1300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-135-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-140-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:16 shuffle-word-1400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:16 shuffle-word-145-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  38K Sep  1 14:16 shuffle-word-15-count.jsonl\n"
+      "-rw-r--r-- 1 root root  41K Sep  1 14:53 shuffle-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-150-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep  1 14:16 shuffle-word-1500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-155-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:16 shuffle-word-160-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:16 shuffle-word-1600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-165-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-170-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:16 shuffle-word-1700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-175-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-180-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 518K Sep  1 14:16 shuffle-word-1800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 518K Sep  1 14:53 shuffle-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-185-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-190-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 522K Sep  1 14:16 shuffle-word-1900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-195-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  38K Sep  1 14:16 shuffle-word-20-count.jsonl\n"
+      "-rw-r--r-- 1 root root  39K Sep  1 14:53 shuffle-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-200-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:16 shuffle-word-2000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-205-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-210-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 519K Sep  1 14:16 shuffle-word-2100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 520K Sep  1 14:53 shuffle-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-215-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-220-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:16 shuffle-word-2200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-225-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-230-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 521K Sep  1 14:16 shuffle-word-2300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 521K Sep  1 14:53 shuffle-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-235-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-240-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 520K Sep  1 14:16 shuffle-word-2400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 519K Sep  1 14:53 shuffle-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-245-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  38K Sep  1 14:16 shuffle-word-25-count.jsonl\n"
+      "-rw-r--r-- 1 root root  37K Sep  1 14:53 shuffle-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-250-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 518K Sep  1 14:16 shuffle-word-2500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 519K Sep  1 14:53 shuffle-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-255-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-260-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 515K Sep  1 14:16 shuffle-word-2600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 516K Sep  1 14:53 shuffle-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-265-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-270-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 510K Sep  1 14:16 shuffle-word-2700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 511K Sep  1 14:53 shuffle-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-275-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-280-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 510K Sep  1 14:16 shuffle-word-2800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-285-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-290-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-2900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-295-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  36K Sep  1 14:16 shuffle-word-30-count.jsonl\n"
+      "-rw-r--r-- 1 root root  35K Sep  1 14:53 shuffle-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-300-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-305-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-310-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-315-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-320-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-325-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-330-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-335-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-340-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-345-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  33K Sep  1 14:16 shuffle-word-35-count.jsonl\n"
+      "-rw-r--r-- 1 root root  33K Sep  1 14:53 shuffle-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-350-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-355-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-360-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-365-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-370-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-3700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-375-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-380-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-385-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-390-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-3900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-395-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:16 shuffle-word-40-count.jsonl\n"
+      "-rw-r--r-- 1 root root  31K Sep  1 14:53 shuffle-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-400-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-405-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-410-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-415-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-420-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-4200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-425-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-430-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-435-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-440-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-445-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:16 shuffle-word-45-count.jsonl\n"
+      "-rw-r--r-- 1 root root  34K Sep  1 14:53 shuffle-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-450-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-455-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-460-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-465-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-470-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-475-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-480-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-485-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-490-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-4900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-495-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  84K Sep  1 14:16 shuffle-word-5-count.jsonl\n"
+      "-rw-r--r-- 1 root root  81K Sep  1 14:53 shuffle-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  31K Sep  1 14:16 shuffle-word-50-count.jsonl\n"
+      "-rw-r--r-- 1 root root  31K Sep  1 14:53 shuffle-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-500-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-505-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-510-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-515-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-520-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-525-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-530-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-535-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-540-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-545-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:16 shuffle-word-55-count.jsonl\n"
+      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-550-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-555-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-560-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-565-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-570-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-575-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-580-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-5800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-585-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-590-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-5900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-595-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  32K Sep  1 14:16 shuffle-word-60-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-600-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-6000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-605-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-610-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-6100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-615-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-620-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-6200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-625-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-630-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-6300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-635-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-640-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-6400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-645-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:16 shuffle-word-65-count.jsonl\n"
+      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-650-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-6500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-655-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-660-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-6600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-665-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-670-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-6700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-675-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-680-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-6800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-685-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-690-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-6900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-695-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:16 shuffle-word-70-count.jsonl\n"
+      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-700-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-7000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-705-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-710-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7100-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-715-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-720-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7200-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-725-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-730-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-7300-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-735-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-740-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7400-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-745-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:16 shuffle-word-75-count.jsonl\n"
+      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-750-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 508K Sep  1 14:16 shuffle-word-7500-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-755-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-760-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7600-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-765-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-770-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7700-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-775-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-780-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7800-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-785-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-790-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-7900-count.jsonl\n"
+      "-rw-r--r-- 1 root root 508K Sep  1 14:53 shuffle-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-795-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  30K Sep  1 14:16 shuffle-word-80-count.jsonl\n"
+      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-800-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 509K Sep  1 14:16 shuffle-word-8000-count.jsonl\n"
+      "-rw-r--r-- 1 root root 509K Sep  1 14:53 shuffle-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-805-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-810-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-815-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-820-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-825-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-830-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-835-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-840-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-845-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  28K Sep  1 14:16 shuffle-word-85-count.jsonl\n"
+      "-rw-r--r-- 1 root root  30K Sep  1 14:53 shuffle-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-850-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-855-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-860-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-865-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-870-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-875-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-880-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-885-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-890-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-895-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-90-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-900-count.jsonl\n"
+      "-rw-r--r-- 1 root root  28K Sep  1 14:53 shuffle-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-905-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-910-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-915-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-920-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-925-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-930-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-935-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-940-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-945-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  29K Sep  1 14:16 shuffle-word-95-count.jsonl\n"
+      "-rw-r--r-- 1 root root  29K Sep  1 14:53 shuffle-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-950-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-955-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-960-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-965-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-970-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-975-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-980-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-985-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  26K Sep  1 14:16 shuffle-word-990-count.jsonl\n"
+      "-rw-r--r-- 1 root root  27K Sep  1 14:53 shuffle-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root  27K Sep  1 14:16 shuffle-word-995-count.jsonl\n"
+      "-rw-r--r-- 1 root root  26K Sep  1 14:53 shuffle-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 6.0K Sep  1 14:16 word-2-count.jsonl\n"
+      "-rw-r--r-- 1 root root 6.0K Sep  1 14:53 word-2-count.jsonl\n"
      ]
     }
    ],
@@ -7935,19 +7941,19 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "ff7c80c3",
+   "id": "ead7aedd",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:17:00.952374Z",
-     "iopub.status.busy": "2023-09-01T14:17:00.951720Z",
-     "iopub.status.idle": "2023-09-01T14:17:17.413235Z",
-     "shell.execute_reply": "2023-09-01T14:17:17.412289Z"
+     "iopub.execute_input": "2023-09-01T14:53:18.175195Z",
+     "iopub.status.busy": "2023-09-01T14:53:18.175015Z",
+     "iopub.status.idle": "2023-09-01T14:53:38.004962Z",
+     "shell.execute_reply": "2023-09-01T14:53:38.004183Z"
     },
     "papermill": {
-     "duration": 16.58971,
-     "end_time": "2023-09-01T14:17:17.415413",
+     "duration": 19.943177,
+     "end_time": "2023-09-01T14:53:38.006545",
      "exception": false,
-     "start_time": "2023-09-01T14:17:00.825703",
+     "start_time": "2023-09-01T14:53:18.063368",
      "status": "completed"
     },
     "tags": []
@@ -7979,9 +7985,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1825379876\r\n",
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 3284035444\r\n",
       "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 1825379876\r\n"
+      "Global seed set to 3284035444\r\n"
      ]
     },
     {
@@ -7996,11 +8002,11 @@
      "output_type": "stream",
      "text": [
       "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230901_141706-f27s10fr\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230901_145324-1c6n4316\u001b[0m\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f27s10fr\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/1c6n4316\u001b[0m\r\n",
       "Traceback (most recent call last):\r\n",
       "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in <module>\r\n",
       "    cli_main()\r\n",
@@ -8035,10 +8041,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f27s10fr\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/1c6n4316\u001b[0m\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v17\u001b[0m\r\n",
       "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230901_141706-f27s10fr/logs\u001b[0m\r\n"
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230901_145324-1c6n4316/logs\u001b[0m\r\n"
      ]
     }
    ],
@@ -8064,19 +8070,19 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "id": "42dd5902",
+   "id": "7ac5ad40",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:17:17.525285Z",
-     "iopub.status.busy": "2023-09-01T14:17:17.525085Z",
-     "iopub.status.idle": "2023-09-01T14:17:20.305484Z",
-     "shell.execute_reply": "2023-09-01T14:17:20.304596Z"
+     "iopub.execute_input": "2023-09-01T14:53:38.115546Z",
+     "iopub.status.busy": "2023-09-01T14:53:38.115356Z",
+     "iopub.status.idle": "2023-09-01T14:53:40.987634Z",
+     "shell.execute_reply": "2023-09-01T14:53:40.986824Z"
     },
     "papermill": {
-     "duration": 2.836752,
-     "end_time": "2023-09-01T14:17:20.307234",
+     "duration": 2.928097,
+     "end_time": "2023-09-01T14:53:40.989318",
      "exception": false,
-     "start_time": "2023-09-01T14:17:17.470482",
+     "start_time": "2023-09-01T14:53:38.061221",
      "status": "completed"
     },
     "tags": []
@@ -8124,19 +8130,19 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "af268d4b",
+   "id": "912275ec",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:17:20.419495Z",
-     "iopub.status.busy": "2023-09-01T14:17:20.419297Z",
-     "iopub.status.idle": "2023-09-01T14:17:25.522948Z",
-     "shell.execute_reply": "2023-09-01T14:17:25.522108Z"
+     "iopub.execute_input": "2023-09-01T14:53:41.101106Z",
+     "iopub.status.busy": "2023-09-01T14:53:41.100914Z",
+     "iopub.status.idle": "2023-09-01T14:53:46.113705Z",
+     "shell.execute_reply": "2023-09-01T14:53:46.112987Z"
     },
     "papermill": {
-     "duration": 5.162462,
-     "end_time": "2023-09-01T14:17:25.524710",
+     "duration": 5.068771,
+     "end_time": "2023-09-01T14:53:46.115376",
      "exception": false,
-     "start_time": "2023-09-01T14:17:20.362248",
+     "start_time": "2023-09-01T14:53:41.046605",
      "status": "completed"
     },
     "tags": []
@@ -8153,13 +8159,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
       "Traceback (most recent call last):\r\n",
       "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
       "    asyncio.run(main_function())\r\n",
@@ -8193,19 +8193,19 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "ca160419",
+   "id": "71ea6284",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-01T14:17:25.641137Z",
-     "iopub.status.busy": "2023-09-01T14:17:25.640946Z",
-     "iopub.status.idle": "2023-09-01T14:17:30.508866Z",
-     "shell.execute_reply": "2023-09-01T14:17:30.507986Z"
+     "iopub.execute_input": "2023-09-01T14:53:46.225301Z",
+     "iopub.status.busy": "2023-09-01T14:53:46.225111Z",
+     "iopub.status.idle": "2023-09-01T14:53:51.386811Z",
+     "shell.execute_reply": "2023-09-01T14:53:51.386023Z"
     },
     "papermill": {
-     "duration": 4.92775,
-     "end_time": "2023-09-01T14:17:30.510674",
+     "duration": 5.217798,
+     "end_time": "2023-09-01T14:53:51.388460",
      "exception": false,
-     "start_time": "2023-09-01T14:17:25.582924",
+     "start_time": "2023-09-01T14:53:46.170662",
      "status": "completed"
     },
     "tags": []
@@ -8273,14 +8273,14 @@
   },
   "papermill": {
    "default_parameters": {},
-   "duration": 42.41759,
-   "end_time": "2023-09-01T14:17:30.786269",
+   "duration": 46.4696,
+   "end_time": "2023-09-01T14:53:51.661120",
    "environment_variables": {},
    "exception": null,
    "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb",
    "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb",
    "parameters": {},
-   "start_time": "2023-09-01T14:16:48.368679",
+   "start_time": "2023-09-01T14:53:05.191520",
    "version": "2.4.0"
   }
  },