diff --git "a/experiment/rwkv-x-exp/v5-headsize2x/v5-L6-D4096-E1e-1-ctx4k-part2.ipynb" "b/experiment/rwkv-x-exp/v5-headsize2x/v5-L6-D4096-E1e-1-ctx4k-part2.ipynb"
new file mode 100644--- /dev/null
+++ "b/experiment/rwkv-x-exp/v5-headsize2x/v5-L6-D4096-E1e-1-ctx4k-part2.ipynb"
@@ -0,0 +1,47302 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "ec63c8af",
+   "metadata": {
+    "papermill": {
+     "duration": 0.005577,
+     "end_time": "2023-08-24T02:33:40.989678",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:40.984101",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# RWKV v5-slim / embedding init-range 1e-01 / 4k\n",
+    "\n",
+    "- 6 layers\n",
+    "- 4096 embedding size\n",
+    "\n",
+    "Going through the modified memory training for v5 models, across various initial embedding model weights\n",
+    "\n",
+    "**Note:** This project assumes you have the rwkv-infctx conda env setup"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "46d4e8e9",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003878,
+     "end_time": "2023-08-24T02:33:40.998538",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:40.994660",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Basic Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ee0e4566",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:33:41.007831Z",
+     "iopub.status.busy": "2023-08-24T02:33:41.007560Z",
+     "iopub.status.idle": "2023-08-24T02:33:41.018442Z",
+     "shell.execute_reply": "2023-08-24T02:33:41.017681Z"
+    },
+    "papermill": {
+     "duration": 0.017264,
+     "end_time": "2023-08-24T02:33:41.019739",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:41.002475",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DEEPSPEED_STRAT: deepspeed_stage_2_offload\n",
+      "ENABLE_WANDB: True\n",
+      "GPU_DEVICES: auto\n",
+      "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x\n",
+      "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x\n",
+      "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x\n",
+      "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
+     ]
+    }
+   ],
+   "source": [
+    "DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n",
+    "GPU_DEVICES=\"auto\"\n",
+    "ENABLE_WANDB=True\n",
+    "\n",
+    "RWKV_WAVENET_LAYERS=1\n",
+    "\n",
+    "EMBED_SCALE=0.1\n",
+    "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
+    "\n",
+    "LAYER_COUNT=6\n",
+    "EMBED_DIM=4096\n",
+    "\n",
+    "WANDB_PREFIX=f\"v5-hs2x-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
+    "FILENAME_PREFIX=f\"v5-hs2x-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
+    "\n",
+    "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
+    "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
+    "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
+    "\n",
+    "if ENABLE_WANDB:\n",
+    "    WANDB_MODE=\"online\"\n",
+    "else:\n",
+    "    WANDB_MODE=\"disabled\"\n",
+    "\n",
+    "# Computing the notebook, and various paths\n",
+    "import os\n",
+    "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
+    "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n",
+    "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5headsize2x/\"))\n",
+    "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5headsize2x/\"))\n",
+    "\n",
+    "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
+    "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
+    "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
+    "print(\"PROJECT_DIR:\", PROJECT_DIR)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5374526",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003883,
+     "end_time": "2023-08-24T02:33:41.027801",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:41.023918",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 1 : Simple Memory instruct finetuning\n",
+    "\n",
+    "- Tune 1: Low ctx size (512), Training with only the input masked. This does very limited memory training, and is used primarily to train the instruction set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "80465c01",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:33:41.037972Z",
+     "iopub.status.busy": "2023-08-24T02:33:41.037800Z",
+     "iopub.status.idle": "2023-08-24T02:33:41.282933Z",
+     "shell.execute_reply": "2023-08-24T02:33:41.282295Z"
+    },
+    "papermill": {
+     "duration": 0.250953,
+     "end_time": "2023-08-24T02:33:41.284090",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:41.033137",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 2500 samples - at ../dataset/word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 2500 samples - at ../dataset/word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 5000 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 2500 samples - at ../dataset/word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 5000 samples - at ../dataset/word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 2500 samples - at ../dataset/word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 2500 samples - at ../dataset/word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 2500 samples - at ../dataset/word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 2500 samples - at ../dataset/word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 2500 samples - at ../dataset/word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 2500 samples - at ../dataset/word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 2500 samples - at ../dataset/word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 21M\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x  2 root root 4.0K Aug 24 02:33 .\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x 11 root root  230 Aug 24 02:33 ..\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 612K Aug 24 02:33 word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.8M Aug 24 02:33 word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 726K Aug 24 02:33 word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 838K Aug 24 02:33 word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 856K Aug 24 02:33 word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 5.2M Aug 24 02:33 word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 966K Aug 24 02:33 word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.4M Aug 24 02:33 word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 967K Aug 24 02:33 word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.6M Aug 24 02:33 word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.3M Aug 24 02:33 word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.3M Aug 24 02:33 word-80-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "# We do a strong bias for smaller word count, to teach the concept from scratch\n",
+    "# so that the model can learn the function. \n",
+    "#\n",
+    "# Note that all document samples, are randomized between the target word count, \n",
+    "# to half of the target word count.\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-2-count.jsonl  2  5000 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-5-count.jsonl  5  5000 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-10-count.jsonl 10 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-15-count.jsonl 15 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-20-count.jsonl 20 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-25-count.jsonl 25 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-40-count.jsonl 40 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-50-count.jsonl 50 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-60-count.jsonl 80 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-80-count.jsonl 80 2500 &\n",
+    "\n",
+    "# With a slight mix of the larger word count\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-100-count.jsonl 100 2500 &\n",
+    "python ../memory_script/gen_limited_segmented_jsonl.py ../dataset/word-200-count.jsonl 200 2500 &\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -alh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "5c92235b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:33:41.296301Z",
+     "iopub.status.busy": "2023-08-24T02:33:41.296136Z",
+     "iopub.status.idle": "2023-08-24T02:33:58.272604Z",
+     "shell.execute_reply": "2023-08-24T02:33:58.271521Z"
+    },
+    "papermill": {
+     "duration": 16.984702,
+     "end_time": "2023-08-24T02:33:58.274744",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:41.290042",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-instruct.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-instruct/', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-enwiki-instruct.pth', '--model.ctx_len=512', '--model.bptt_learning_range=1'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-instruct.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-instruct/', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-enwiki-instruct.pth', '--model.ctx_len=512', '--model.bptt_learning_range=1'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 2114940700\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 2114940700\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.15.8 is available!  To upgrade, please run:\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.4\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230824_023347-cpumqksp\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_2_offload)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/cpumqksp\u001b[0m\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 254, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
+      "    self.instantiate_classes()\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
+      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
+      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
+      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
+      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
+      "    component.instantiate_class(component, cfg)\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
+      "    parent[key] = group.group_class(**value)\r\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '../model/v5-hs2x-L6-D4096-E0_1-enwiki-instruct.pth' does not exist\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/cpumqksp\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230824_023347-cpumqksp/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/v5base-mem-instruct.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Instruct (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\" \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-instruct/\" \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-instruct.pth\" \\\n",
+    "        --model.ctx_len=512 \\\n",
+    "        --model.bptt_learning_range=1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6d1b4827",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:33:58.289091Z",
+     "iopub.status.busy": "2023-08-24T02:33:58.288785Z",
+     "iopub.status.idle": "2023-08-24T02:34:00.988652Z",
+     "shell.execute_reply": "2023-08-24T02:34:00.987785Z"
+    },
+    "papermill": {
+     "duration": 2.708793,
+     "end_time": "2023-08-24T02:34:00.990395",
+     "exception": false,
+     "start_time": "2023-08-24T02:33:58.281602",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-instruct/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5-hs2x-L6-D4096-E0_1-mem-instruct.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-instruct/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-instruct.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-instruct.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e1b1d995",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:01.004199Z",
+     "iopub.status.busy": "2023-08-24T02:34:01.003999Z",
+     "iopub.status.idle": "2023-08-24T02:34:06.363922Z",
+     "shell.execute_reply": "2023-08-24T02:34:06.363054Z"
+    },
+    "papermill": {
+     "duration": 5.368694,
+     "end_time": "2023-08-24T02:34:06.365757",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:00.997063",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-instruct.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b7049bf1",
+   "metadata": {
+    "papermill": {
+     "duration": 0.006184,
+     "end_time": "2023-08-24T02:34:06.378865",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:06.372681",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 2 : Low ctx size (512), memory training\n",
+    "\n",
+    "- Tune 2: Low ctx size (512), Training with instruction & input masked. This forces the actual memory training on the output tokens."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f11fd3cd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:06.392688Z",
+     "iopub.status.busy": "2023-08-24T02:34:06.392491Z",
+     "iopub.status.idle": "2023-08-24T02:34:06.801858Z",
+     "shell.execute_reply": "2023-08-24T02:34:06.801366Z"
+    },
+    "papermill": {
+     "duration": 0.418673,
+     "end_time": "2023-08-24T02:34:06.803753",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:06.385080",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 5000 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3563 samples (20 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 675 samples (50 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3192 samples (30 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1332 samples (50 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1775 samples (50 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 5000 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 5000 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 5000 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 5000 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5257 samples (20 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 2639 samples (50 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 5000 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 5000 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 5000 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 5000 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 5000 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 5000 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 5000 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 5000 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 5000 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 5000 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 5000 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 5000 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 5000 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 5000 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 5000 samples - at ../dataset/word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 5000 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 5000 samples - at ../dataset/word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 79M\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x  2 root root 4.0K Aug 24 02:34 .\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x 11 root root  230 Aug 24 02:33 ..\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 987K Aug 24 02:34 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.2M Aug 24 02:34 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.5M Aug 24 02:34 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.7M Aug 24 02:34 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.0M Aug 24 02:34 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.2M Aug 24 02:34 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.4M Aug 24 02:34 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.6M Aug 24 02:34 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 728K Aug 24 02:34 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 2.9M Aug 24 02:34 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 3.1M Aug 24 02:34 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 3.4M Aug 24 02:34 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 3.6M Aug 24 02:34 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 3.8M Aug 24 02:34 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 4.1M Aug 24 02:34 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 4.3M Aug 24 02:34 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 4.5M Aug 24 02:34 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 4.8M Aug 24 02:34 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 5.0M Aug 24 02:34 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.1M Aug 24 02:34 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.4M Aug 24 02:34 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 868K Aug 24 02:34 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.4M Aug 24 02:34 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.1M Aug 24 02:34 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.5M Aug 24 02:34 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1.5M Aug 24 02:34 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 5.2M Aug 24 02:34 word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 596K Aug 24 02:34 word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  10M Aug 24 02:34 word-200-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We switch over to fully masked instruct+input, to properly learn the memorization task\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl  2  5000 &\n",
+    "for i in {5..95..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 5000 & \n",
+    "done\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-100-count.jsonl 100 5000 &\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-200-count.jsonl 200 5000 &\n",
+    "\n",
+    "#\n",
+    "# We mixin the shuffled word list, so that we ensure all words / tokens are learned\n",
+    "# however this might intrduce an exclusion bias (if seen this word, never repeat it), \n",
+    "# so we limit the mixture of this data samples\n",
+    "#\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-10-count.jsonl 10 20 &\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-15-count.jsonl 15 20 &\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-25-count.jsonl 25 30 &\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-50-count.jsonl 50 50 &\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-75-count.jsonl 75 50 &\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-100-count.jsonl 100 50 &\n",
+    "python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-200-count.jsonl 200 50 &\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -alh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ac2cdf70",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:06.825314Z",
+     "iopub.status.busy": "2023-08-24T02:34:06.825053Z",
+     "iopub.status.idle": "2023-08-24T02:34:21.148462Z",
+     "shell.execute_reply": "2023-08-24T02:34:21.147486Z"
+    },
+    "papermill": {
+     "duration": 14.336064,
+     "end_time": "2023-08-24T02:34:21.150572",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:06.814508",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-instruct.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 71315400\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 71315400\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.15.8 is available!  To upgrade, please run:\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.4\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230824_023412-42b9rj47\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_2_offload)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/42b9rj47\u001b[0m\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 254, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
+      "    self.instantiate_classes()\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
+      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
+      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
+      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
+      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
+      "    component.instantiate_class(component, cfg)\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
+      "    parent[key] = group.group_class(**value)\r\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '../model/v5-hs2x-L6-D4096-E0_1-mem-instruct.pth' does not exist\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/42b9rj47\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230824_023412-42b9rj47/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/v5base-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-512 (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/\" \\\n",
+    "        --model.lr_init=5e-4 \\\n",
+    "        --model.lr_final=4e-4 \\\n",
+    "        --data.max_token_size=512 \\\n",
+    "        --model.ctx_len=512 \\\n",
+    "        --model.bptt_learning_range=1 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-instruct.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c850b641",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:21.176182Z",
+     "iopub.status.busy": "2023-08-24T02:34:21.175898Z",
+     "iopub.status.idle": "2023-08-24T02:34:23.970934Z",
+     "shell.execute_reply": "2023-08-24T02:34:23.970062Z"
+    },
+    "papermill": {
+     "duration": 2.80889,
+     "end_time": "2023-08-24T02:34:23.972376",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:21.163486",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-512/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-512.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e0dad096",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:23.994451Z",
+     "iopub.status.busy": "2023-08-24T02:34:23.994263Z",
+     "iopub.status.idle": "2023-08-24T02:34:29.162165Z",
+     "shell.execute_reply": "2023-08-24T02:34:29.161478Z"
+    },
+    "papermill": {
+     "duration": 5.180701,
+     "end_time": "2023-08-24T02:34:29.164141",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:23.983440",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-ctx-512.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f2ef7df8",
+   "metadata": {
+    "papermill": {
+     "duration": 0.085457,
+     "end_time": "2023-08-24T02:34:29.261263",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:29.175806",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 3 : Low ctx size (1024), memory training\n",
+    "\n",
+    "- Tune 3: Low ctx size (1024), Scaling up !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5ebddd6a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:29.283632Z",
+     "iopub.status.busy": "2023-08-24T02:34:29.283419Z",
+     "iopub.status.idle": "2023-08-24T02:34:29.857534Z",
+     "shell.execute_reply": "2023-08-24T02:34:29.856296Z"
+    },
+    "papermill": {
+     "duration": 0.605793,
+     "end_time": "2023-08-24T02:34:29.877588",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:29.271795",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 400 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 400 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 400 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 400 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 400 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 400 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 400 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1293 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 653 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 584 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 750 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 400 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 880 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 879 samples (20 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 800 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 400 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 800 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 808 samples (20 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 2609 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 633 samples (20 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 751 samples (20 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 959 samples (20 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1054 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 400 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 567 samples (20 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 410 samples (20 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 800 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 800 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 535 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 800 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 800 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5572 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1761 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 334 samples (20 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1058 samples (20 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 294 samples (20 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 141 samples (20 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 143 samples (20 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 289 samples (20 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 278 samples (20 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 316 samples (20 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 181 samples (20 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 324 samples (20 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 200 samples (20 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 105 max words, 800 samples - at ../dataset/gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 309 samples (20 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 283 samples (20 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 584 samples (20 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 199 samples (20 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 155 max words, 800 samples - at ../dataset/gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 187 samples (20 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 346 samples (20 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 170 max words, 800 samples - at ../dataset/gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 383 samples (20 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 658 samples (20 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 800 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 800 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 278 samples (20 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 115 max words, 800 samples - at ../dataset/gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 182 samples (20 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 269 samples (20 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 298 samples (20 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 800 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 192 samples (20 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 800 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 110 max words, 800 samples - at ../dataset/gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 180 max words, 800 samples - at ../dataset/gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 272 samples (20 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 184 samples (20 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 175 max words, 800 samples - at ../dataset/gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 183 samples (20 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 120 max words, 800 samples - at ../dataset/gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 125 max words, 800 samples - at ../dataset/gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 145 max words, 800 samples - at ../dataset/gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 185 samples (20 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 145 samples (20 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 190 max words, 800 samples - at ../dataset/gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 142 samples (20 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 119 samples (20 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 135 max words, 800 samples - at ../dataset/gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 184 samples (20 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 141 samples (20 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 203 samples (20 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 119 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 245 max words, 800 samples - at ../dataset/gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 356 samples (20 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 191 samples (20 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 140 max words, 800 samples - at ../dataset/gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 265 samples (20 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 209 samples (20 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 187 samples (20 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 398 samples (20 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 142 samples (20 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 143 samples (20 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 335 max words, 800 samples - at ../dataset/gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 182 samples (20 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 130 max words, 800 samples - at ../dataset/gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 370 samples (20 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 142 samples (20 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 195 samples (20 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 143 samples (20 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 115 samples (20 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 182 samples (20 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 165 max words, 800 samples - at ../dataset/gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 265 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 219 samples (20 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 181 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 800 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 141 samples (20 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 225 max words, 800 samples - at ../dataset/gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 230 max words, 800 samples - at ../dataset/gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 220 max words, 800 samples - at ../dataset/gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 260 max words, 800 samples - at ../dataset/gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 700 samples (20 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 475 max words, 800 samples - at ../dataset/gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 285 max words, 800 samples - at ../dataset/gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 195 max words, 800 samples - at ../dataset/gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 200 samples (20 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 160 max words, 800 samples - at ../dataset/gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 325 max words, 800 samples - at ../dataset/gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 235 max words, 800 samples - at ../dataset/gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 350 max words, 800 samples - at ../dataset/gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 215 max words, 800 samples - at ../dataset/gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 150 max words, 800 samples - at ../dataset/gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 290 max words, 800 samples - at ../dataset/gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 265 max words, 800 samples - at ../dataset/gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 370 max words, 800 samples - at ../dataset/gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 800 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 450 max words, 800 samples - at ../dataset/gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 205 max words, 800 samples - at ../dataset/gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 315 max words, 800 samples - at ../dataset/gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 375 max words, 800 samples - at ../dataset/gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 310 max words, 800 samples - at ../dataset/gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 385 max words, 800 samples - at ../dataset/gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 255 max words, 800 samples - at ../dataset/gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 270 max words, 800 samples - at ../dataset/gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 495 max words, 800 samples - at ../dataset/gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 185 max words, 800 samples - at ../dataset/gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 250 max words, 800 samples - at ../dataset/gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 360 max words, 800 samples - at ../dataset/gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 355 max words, 800 samples - at ../dataset/gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 210 max words, 800 samples - at ../dataset/gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 330 max words, 800 samples - at ../dataset/gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 405 max words, 800 samples - at ../dataset/gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 340 max words, 800 samples - at ../dataset/gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 240 max words, 800 samples - at ../dataset/gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 345 max words, 800 samples - at ../dataset/gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 440 max words, 800 samples - at ../dataset/gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 390 max words, 800 samples - at ../dataset/gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 800 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 380 max words, 800 samples - at ../dataset/gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 365 max words, 800 samples - at ../dataset/gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 490 max words, 800 samples - at ../dataset/gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 465 max words, 800 samples - at ../dataset/gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 395 max words, 800 samples - at ../dataset/gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 550 max words, 800 samples - at ../dataset/gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 425 max words, 800 samples - at ../dataset/gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 305 max words, 800 samples - at ../dataset/gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 280 max words, 800 samples - at ../dataset/gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 420 max words, 800 samples - at ../dataset/gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 520 max words, 800 samples - at ../dataset/gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 800 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 800 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 410 max words, 800 samples - at ../dataset/gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 295 max words, 800 samples - at ../dataset/gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 275 max words, 800 samples - at ../dataset/gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 320 max words, 800 samples - at ../dataset/gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 460 max words, 800 samples - at ../dataset/gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 430 max words, 800 samples - at ../dataset/gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 445 max words, 800 samples - at ../dataset/gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 470 max words, 800 samples - at ../dataset/gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 515 max words, 800 samples - at ../dataset/gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 485 max words, 800 samples - at ../dataset/gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 415 max words, 800 samples - at ../dataset/gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 505 max words, 800 samples - at ../dataset/gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 530 max words, 800 samples - at ../dataset/gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 435 max words, 800 samples - at ../dataset/gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 480 max words, 800 samples - at ../dataset/gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 455 max words, 800 samples - at ../dataset/gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 535 max words, 800 samples - at ../dataset/gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 510 max words, 800 samples - at ../dataset/gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 545 max words, 800 samples - at ../dataset/gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 525 max words, 800 samples - at ../dataset/gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 540 max words, 800 samples - at ../dataset/gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 297M\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x  2 root root   12K Aug 24 02:34 .\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x 11 root root   230 Aug 24 02:33 ..\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   80K Aug 24 02:34 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  839K Aug 24 02:34 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  898K Aug 24 02:34 gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  935K Aug 24 02:34 gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  973K Aug 24 02:34 gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1009K Aug 24 02:34 gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   97K Aug 24 02:34 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  117K Aug 24 02:34 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  136K Aug 24 02:34 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  157K Aug 24 02:34 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  178K Aug 24 02:34 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  194K Aug 24 02:34 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  211K Aug 24 02:34 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:34 gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   59K Aug 24 02:34 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  469K Aug 24 02:34 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:34 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:34 gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:34 gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:34 gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:34 gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:34 gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:34 gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:34 gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:34 gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:34 gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  505K Aug 24 02:34 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:34 gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  544K Aug 24 02:34 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  579K Aug 24 02:34 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  623K Aug 24 02:34 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  665K Aug 24 02:34 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  701K Aug 24 02:34 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  742K Aug 24 02:34 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  774K Aug 24 02:34 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  804K Aug 24 02:34 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  500K Aug 24 02:34 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  567K Aug 24 02:34 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  559K Aug 24 02:34 shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  556K Aug 24 02:34 shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  551K Aug 24 02:34 shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  554K Aug 24 02:34 shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  549K Aug 24 02:34 shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  548K Aug 24 02:34 shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  548K Aug 24 02:34 shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  543K Aug 24 02:34 shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  538K Aug 24 02:34 shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  421K Aug 24 02:34 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  545K Aug 24 02:34 shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  546K Aug 24 02:34 shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  545K Aug 24 02:34 shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  540K Aug 24 02:34 shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  543K Aug 24 02:34 shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  540K Aug 24 02:34 shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  541K Aug 24 02:34 shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  539K Aug 24 02:34 shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  539K Aug 24 02:34 shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  542K Aug 24 02:34 shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  389K Aug 24 02:34 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  539K Aug 24 02:34 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  540K Aug 24 02:34 shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  537K Aug 24 02:34 shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  538K Aug 24 02:34 shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  536K Aug 24 02:34 shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  536K Aug 24 02:34 shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  363K Aug 24 02:34 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  537K Aug 24 02:34 shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  536K Aug 24 02:34 shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  536K Aug 24 02:34 shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  341K Aug 24 02:34 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  535K Aug 24 02:34 shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  330K Aug 24 02:34 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  321K Aug 24 02:34 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  312K Aug 24 02:34 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  815K Aug 24 02:34 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  610K Aug 24 02:34 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  596K Aug 24 02:34 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  596K Aug 24 02:34 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  591K Aug 24 02:34 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  593K Aug 24 02:34 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  576K Aug 24 02:34 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  576K Aug 24 02:34 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  576K Aug 24 02:34 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  561K Aug 24 02:34 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  572K Aug 24 02:34 shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   48K Aug 24 02:34 word-2-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for lower word count - and shift the focus upwards\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 400 &\n",
+    "for i in {5..45..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 400 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 50+ - 510 words dataset\n",
+    "# \n",
+    "for i in {50..550..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 800 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -alh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "da84b1a5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:29.942378Z",
+     "iopub.status.busy": "2023-08-24T02:34:29.941857Z",
+     "iopub.status.idle": "2023-08-24T02:34:44.397615Z",
+     "shell.execute_reply": "2023-08-24T02:34:44.396837Z"
+    },
+    "papermill": {
+     "duration": 14.489654,
+     "end_time": "2023-08-24T02:34:44.399883",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:29.910229",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-1k (train-ctx=1k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=1024', '--model.ctx_len=1024', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-1k (train-ctx=1k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=1024', '--model.ctx_len=1024', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-512.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 3583079656\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 3583079656\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.15.8 is available!  To upgrade, please run:\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.4\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230824_023435-3l5c5uk1\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-1k (train-ctx=1k, deepspeed_stage_2_offload)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/3l5c5uk1\u001b[0m\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 254, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
+      "    self.instantiate_classes()\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
+      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
+      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
+      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
+      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
+      "    component.instantiate_class(component, cfg)\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
+      "    parent[key] = group.group_class(**value)\r\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-512.pth' does not exist\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-1k (train-ctx=1k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/3l5c5uk1\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230824_023435-3l5c5uk1/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/v5base-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-1k (train-ctx=1k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-1k/\" \\\n",
+    "        --model.lr_init=4e-4 \\\n",
+    "        --model.lr_final=2e-4 \\\n",
+    "        --data.max_token_size=1024 \\\n",
+    "        --model.ctx_len=1024 \\\n",
+    "        --model.bptt_learning_range=1 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "53fcb781",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:44.464877Z",
+     "iopub.status.busy": "2023-08-24T02:34:44.464691Z",
+     "iopub.status.idle": "2023-08-24T02:34:47.142623Z",
+     "shell.execute_reply": "2023-08-24T02:34:47.142120Z"
+    },
+    "papermill": {
+     "duration": 2.711863,
+     "end_time": "2023-08-24T02:34:47.144554",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:44.432691",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-1k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-1k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-1k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "221545d4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:47.210487Z",
+     "iopub.status.busy": "2023-08-24T02:34:47.210312Z",
+     "iopub.status.idle": "2023-08-24T02:34:51.959768Z",
+     "shell.execute_reply": "2023-08-24T02:34:51.958919Z"
+    },
+    "papermill": {
+     "duration": 4.783825,
+     "end_time": "2023-08-24T02:34:51.961425",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:47.177600",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-1k.pth\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bef7d620",
+   "metadata": {
+    "papermill": {
+     "duration": 0.031277,
+     "end_time": "2023-08-24T02:34:52.026070",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:51.994793",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 4 : Low ctx size (2048), memory training\n",
+    "\n",
+    "- Tune 4: Low ctx size (2048), Scaling up !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "a232c2bd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:52.090318Z",
+     "iopub.status.busy": "2023-08-24T02:34:52.090035Z",
+     "iopub.status.idle": "2023-08-24T02:34:53.772688Z",
+     "shell.execute_reply": "2023-08-24T02:34:53.771784Z"
+    },
+    "papermill": {
+     "duration": 1.834737,
+     "end_time": "2023-08-24T02:34:53.892090",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:52.057353",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 76 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 133 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 262 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 555 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 45 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 47 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 90 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 108 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 173 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 100 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 29 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 180 max words, 200 samples - at ../dataset/gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 155 max words, 200 samples - at ../dataset/gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 160 max words, 200 samples - at ../dataset/gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 33 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 130 max words, 200 samples - at ../dataset/gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 270 samples (20 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 408 samples (20 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 200 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 205 max words, 200 samples - at ../dataset/gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 145 max words, 200 samples - at ../dataset/gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 285 max words, 200 samples - at ../dataset/gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 200 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 82 samples (20 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 195 samples (20 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 310 max words, 200 samples - at ../dataset/gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 274 samples (20 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 141 samples (20 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 291 samples (20 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 220 max words, 200 samples - at ../dataset/gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 525 max words, 200 samples - at ../dataset/gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 27 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 98 samples (20 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 615 max words, 200 samples - at ../dataset/gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 120 max words, 200 samples - at ../dataset/gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 328 samples (20 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 277 samples (20 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 195 max words, 200 samples - at ../dataset/gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 150 max words, 200 samples - at ../dataset/gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 301 samples (20 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 265 max words, 200 samples - at ../dataset/gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 450 max words, 200 samples - at ../dataset/gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 460 max words, 200 samples - at ../dataset/gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 275 max words, 200 samples - at ../dataset/gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 290 samples (20 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 110 max words, 200 samples - at ../dataset/gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 115 max words, 200 samples - at ../dataset/gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 345 samples (20 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 181 samples (20 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 213 samples (20 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 82 samples (20 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 185 samples (20 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 115 samples (20 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 142 samples (20 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 630 max words, 200 samples - at ../dataset/gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 192 samples (20 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 105 max words, 200 samples - at ../dataset/gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 315 samples (20 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 565 max words, 200 samples - at ../dataset/gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 165 max words, 200 samples - at ../dataset/gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 255 max words, 200 samples - at ../dataset/gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 335 max words, 200 samples - at ../dataset/gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 410 max words, 200 samples - at ../dataset/gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 204 samples (20 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 515 max words, 200 samples - at ../dataset/gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 340 max words, 200 samples - at ../dataset/gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 175 max words, 200 samples - at ../dataset/gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 360 max words, 200 samples - at ../dataset/gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 225 max words, 200 samples - at ../dataset/gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 217 samples (20 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 290 max words, 200 samples - at ../dataset/gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 302 samples (20 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 585 max words, 200 samples - at ../dataset/gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 215 max words, 200 samples - at ../dataset/gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 375 max words, 200 samples - at ../dataset/gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 330 max words, 200 samples - at ../dataset/gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 359 samples (20 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 200 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 270 max words, 200 samples - at ../dataset/gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 445 max words, 200 samples - at ../dataset/gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 269 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 350 max words, 200 samples - at ../dataset/gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 185 samples (20 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 140 max words, 200 samples - at ../dataset/gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 200 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 184 samples (20 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 190 max words, 200 samples - at ../dataset/gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 115 samples (20 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 182 samples (20 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 405 max words, 200 samples - at ../dataset/gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 185 max words, 200 samples - at ../dataset/gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 125 max words, 200 samples - at ../dataset/gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 144 samples (20 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 145 samples (20 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 430 max words, 200 samples - at ../dataset/gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 230 max words, 200 samples - at ../dataset/gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 119 samples (20 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 305 max words, 200 samples - at ../dataset/gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 144 samples (20 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 82 samples (20 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 325 max words, 200 samples - at ../dataset/gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 333 samples (20 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 201 samples (20 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 385 max words, 200 samples - at ../dataset/gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 193 samples (20 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 730 max words, 200 samples - at ../dataset/gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 480 max words, 200 samples - at ../dataset/gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 276 samples (20 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 115 samples (20 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 183 samples (20 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 550 max words, 200 samples - at ../dataset/gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 119 samples (20 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 29 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 250 max words, 200 samples - at ../dataset/gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 116 samples (20 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 98 samples (20 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 185 samples (20 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 119 samples (20 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 315 max words, 200 samples - at ../dataset/gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 240 max words, 200 samples - at ../dataset/gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 170 max words, 200 samples - at ../dataset/gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 345 max words, 200 samples - at ../dataset/gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 382 samples (20 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 142 samples (20 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 183 samples (20 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 135 max words, 200 samples - at ../dataset/gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1010 max words - at ../dataset/shuffle-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 280 max words, 200 samples - at ../dataset/gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 555 max words, 200 samples - at ../dataset/gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 420 max words, 200 samples - at ../dataset/gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 435 max words, 200 samples - at ../dataset/gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1105 max words - at ../dataset/shuffle-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 320 max words, 200 samples - at ../dataset/gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 485 max words, 200 samples - at ../dataset/gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 680 max words, 200 samples - at ../dataset/gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 570 max words, 200 samples - at ../dataset/gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 78 samples (20 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 790 max words, 200 samples - at ../dataset/gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 575 max words, 200 samples - at ../dataset/gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1565 max words - at ../dataset/shuffle-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 200 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 67 samples (20 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 490 max words, 200 samples - at ../dataset/gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 365 max words, 200 samples - at ../dataset/gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 278 samples (20 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 200 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 610 max words, 200 samples - at ../dataset/gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 62 samples (20 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 535 max words, 200 samples - at ../dataset/gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 370 max words, 200 samples - at ../dataset/gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 580 max words, 200 samples - at ../dataset/gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1380 max words - at ../dataset/shuffle-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 183 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1035 max words - at ../dataset/shuffle-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 415 max words, 200 samples - at ../dataset/gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1420 max words, 200 samples - at ../dataset/gen-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 440 max words, 200 samples - at ../dataset/gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 199 samples (20 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1490 max words - at ../dataset/shuffle-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 397 samples (20 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 395 max words, 200 samples - at ../dataset/gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 295 max words, 200 samples - at ../dataset/gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1430 max words - at ../dataset/shuffle-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 245 max words, 200 samples - at ../dataset/gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 380 max words, 200 samples - at ../dataset/gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 505 max words, 200 samples - at ../dataset/gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 495 max words, 200 samples - at ../dataset/gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 78 samples (20 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 625 max words, 200 samples - at ../dataset/gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 590 max words, 200 samples - at ../dataset/gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 190 samples (20 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 280 samples (20 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 455 max words, 200 samples - at ../dataset/gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 510 max words, 200 samples - at ../dataset/gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 141 samples (20 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 530 max words, 200 samples - at ../dataset/gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1085 max words - at ../dataset/shuffle-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1765 max words - at ../dataset/shuffle-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1265 max words - at ../dataset/shuffle-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 605 max words, 200 samples - at ../dataset/gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1270 max words - at ../dataset/shuffle-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 520 max words, 200 samples - at ../dataset/gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1445 max words - at ../dataset/shuffle-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1310 max words - at ../dataset/shuffle-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1555 max words - at ../dataset/shuffle-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1415 max words - at ../dataset/shuffle-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 260 max words, 200 samples - at ../dataset/gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 645 max words, 200 samples - at ../dataset/gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 675 max words, 200 samples - at ../dataset/gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1605 max words - at ../dataset/shuffle-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 235 max words, 200 samples - at ../dataset/gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 545 max words, 200 samples - at ../dataset/gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1620 max words - at ../dataset/shuffle-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 820 max words, 200 samples - at ../dataset/gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1060 max words - at ../dataset/shuffle-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 620 max words, 200 samples - at ../dataset/gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 475 max words, 200 samples - at ../dataset/gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 725 max words, 200 samples - at ../dataset/gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1005 max words, 200 samples - at ../dataset/gen-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 745 max words, 200 samples - at ../dataset/gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1255 max words - at ../dataset/shuffle-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1540 max words, 200 samples - at ../dataset/gen-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 355 max words, 200 samples - at ../dataset/gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1005 max words - at ../dataset/shuffle-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1530 max words - at ../dataset/shuffle-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 62 samples (20 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1815 max words - at ../dataset/shuffle-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1250 max words - at ../dataset/shuffle-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 66 samples (20 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1305 max words - at ../dataset/shuffle-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1635 max words - at ../dataset/shuffle-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 425 max words, 200 samples - at ../dataset/gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 540 max words, 200 samples - at ../dataset/gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1880 max words - at ../dataset/shuffle-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1090 max words - at ../dataset/shuffle-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 210 max words, 200 samples - at ../dataset/gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 390 max words, 200 samples - at ../dataset/gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1320 max words - at ../dataset/shuffle-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 865 max words, 200 samples - at ../dataset/gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1050 max words - at ../dataset/shuffle-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1375 max words - at ../dataset/shuffle-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1715 max words - at ../dataset/shuffle-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 710 max words, 200 samples - at ../dataset/gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1455 max words - at ../dataset/shuffle-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1175 max words, 200 samples - at ../dataset/gen-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1740 max words - at ../dataset/shuffle-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1985 max words - at ../dataset/shuffle-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1995 max words - at ../dataset/shuffle-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1685 max words - at ../dataset/shuffle-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1315 max words - at ../dataset/shuffle-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1440 max words - at ../dataset/shuffle-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1855 max words - at ../dataset/shuffle-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1950 max words - at ../dataset/shuffle-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 47 samples (20 token repeat) - 1280 max words - at ../dataset/shuffle-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1450 max words - at ../dataset/shuffle-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 470 max words, 200 samples - at ../dataset/gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1120 max words - at ../dataset/shuffle-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 200 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 635 max words, 200 samples - at ../dataset/gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1465 max words - at ../dataset/shuffle-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 795 max words, 200 samples - at ../dataset/gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1325 max words - at ../dataset/shuffle-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1555 max words, 200 samples - at ../dataset/gen-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 815 max words, 200 samples - at ../dataset/gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1040 max words - at ../dataset/shuffle-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 595 max words, 200 samples - at ../dataset/gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1275 max words - at ../dataset/shuffle-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 810 max words, 200 samples - at ../dataset/gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 780 max words, 200 samples - at ../dataset/gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1435 max words - at ../dataset/shuffle-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1110 max words - at ../dataset/shuffle-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1140 max words, 200 samples - at ../dataset/gen-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1560 max words, 200 samples - at ../dataset/gen-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1220 max words - at ../dataset/shuffle-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 670 max words, 200 samples - at ../dataset/gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1405 max words - at ../dataset/shuffle-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1905 max words - at ../dataset/shuffle-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 46 samples (20 token repeat) - 1205 max words - at ../dataset/shuffle-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 560 max words, 200 samples - at ../dataset/gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1615 max words - at ../dataset/shuffle-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 48 samples (20 token repeat) - 1295 max words - at ../dataset/shuffle-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1840 max words - at ../dataset/shuffle-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1330 max words - at ../dataset/shuffle-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 690 max words, 200 samples - at ../dataset/gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 46 samples (20 token repeat) - 1230 max words - at ../dataset/shuffle-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1015 max words - at ../dataset/shuffle-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1260 max words - at ../dataset/shuffle-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1915 max words - at ../dataset/shuffle-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1160 max words - at ../dataset/shuffle-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1055 max words, 200 samples - at ../dataset/gen-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1680 max words - at ../dataset/shuffle-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1830 max words - at ../dataset/shuffle-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1930 max words - at ../dataset/shuffle-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1225 max words - at ../dataset/shuffle-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 200 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 885 max words, 200 samples - at ../dataset/gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 895 max words, 200 samples - at ../dataset/gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1020 max words - at ../dataset/shuffle-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 860 max words, 200 samples - at ../dataset/gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1810 max words, 200 samples - at ../dataset/gen-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 755 max words, 200 samples - at ../dataset/gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1175 max words - at ../dataset/shuffle-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 650 max words, 200 samples - at ../dataset/gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1390 max words, 200 samples - at ../dataset/gen-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 910 max words, 200 samples - at ../dataset/gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1610 max words - at ../dataset/shuffle-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1795 max words, 200 samples - at ../dataset/gen-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1305 max words, 200 samples - at ../dataset/gen-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 935 max words, 200 samples - at ../dataset/gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1020 max words, 200 samples - at ../dataset/gen-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 735 max words, 200 samples - at ../dataset/gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 720 max words, 200 samples - at ../dataset/gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1085 max words, 200 samples - at ../dataset/gen-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 705 max words, 200 samples - at ../dataset/gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 183 samples (20 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1845 max words, 200 samples - at ../dataset/gen-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1510 max words - at ../dataset/shuffle-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1950 max words, 200 samples - at ../dataset/gen-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1285 max words, 200 samples - at ../dataset/gen-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1820 max words - at ../dataset/shuffle-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1695 max words - at ../dataset/shuffle-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1130 max words, 200 samples - at ../dataset/gen-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 52 samples (20 token repeat) - 1285 max words - at ../dataset/shuffle-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 200 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1470 max words - at ../dataset/shuffle-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1540 max words - at ../dataset/shuffle-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1010 max words, 200 samples - at ../dataset/gen-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1065 max words - at ../dataset/shuffle-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1410 max words - at ../dataset/shuffle-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1640 max words - at ../dataset/shuffle-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1535 max words - at ../dataset/shuffle-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 62 samples (20 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1340 max words - at ../dataset/shuffle-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1075 max words, 200 samples - at ../dataset/gen-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 48 samples (20 token repeat) - 1215 max words - at ../dataset/shuffle-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1025 max words, 200 samples - at ../dataset/gen-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 640 max words, 200 samples - at ../dataset/gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1140 max words - at ../dataset/shuffle-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1650 max words - at ../dataset/shuffle-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 120 samples (20 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1580 max words - at ../dataset/shuffle-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1515 max words - at ../dataset/shuffle-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1625 max words - at ../dataset/shuffle-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1385 max words - at ../dataset/shuffle-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 855 max words, 200 samples - at ../dataset/gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 118 samples (20 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1190 max words - at ../dataset/shuffle-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1660 max words - at ../dataset/shuffle-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1940 max words - at ../dataset/shuffle-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1060 max words, 200 samples - at ../dataset/gen-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1210 max words - at ../dataset/shuffle-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1370 max words - at ../dataset/shuffle-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1525 max words - at ../dataset/shuffle-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1025 max words - at ../dataset/shuffle-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 370 samples (20 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 970 max words, 200 samples - at ../dataset/gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1415 max words, 200 samples - at ../dataset/gen-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1265 max words, 200 samples - at ../dataset/gen-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1045 max words - at ../dataset/shuffle-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 200 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 825 max words, 200 samples - at ../dataset/gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 465 max words, 200 samples - at ../dataset/gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1030 max words - at ../dataset/shuffle-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 830 max words, 200 samples - at ../dataset/gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 740 max words, 200 samples - at ../dataset/gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 750 max words, 200 samples - at ../dataset/gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1115 max words, 200 samples - at ../dataset/gen-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 66 samples (20 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1180 max words, 200 samples - at ../dataset/gen-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1515 max words, 200 samples - at ../dataset/gen-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 925 max words, 200 samples - at ../dataset/gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 845 max words, 200 samples - at ../dataset/gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1360 max words - at ../dataset/shuffle-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1075 max words - at ../dataset/shuffle-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 200 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 920 max words, 200 samples - at ../dataset/gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1345 max words, 200 samples - at ../dataset/gen-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 665 max words, 200 samples - at ../dataset/gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1655 max words - at ../dataset/shuffle-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1065 max words, 200 samples - at ../dataset/gen-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1795 max words - at ../dataset/shuffle-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 905 max words, 200 samples - at ../dataset/gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1125 max words - at ../dataset/shuffle-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 765 max words, 200 samples - at ../dataset/gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1195 max words, 200 samples - at ../dataset/gen-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1225 max words, 200 samples - at ../dataset/gen-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1475 max words, 200 samples - at ../dataset/gen-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 915 max words, 200 samples - at ../dataset/gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1155 max words - at ../dataset/shuffle-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1735 max words - at ../dataset/shuffle-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 805 max words, 200 samples - at ../dataset/gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 785 max words, 200 samples - at ../dataset/gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 880 max words, 200 samples - at ../dataset/gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 850 max words, 200 samples - at ../dataset/gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 115 samples (20 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 930 max words, 200 samples - at ../dataset/gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1205 max words, 200 samples - at ../dataset/gen-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1055 max words - at ../dataset/shuffle-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1870 max words - at ../dataset/shuffle-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1135 max words, 200 samples - at ../dataset/gen-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1145 max words, 200 samples - at ../dataset/gen-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1780 max words - at ../dataset/shuffle-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1410 max words, 200 samples - at ../dataset/gen-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 870 max words, 200 samples - at ../dataset/gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 875 max words, 200 samples - at ../dataset/gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1910 max words, 200 samples - at ../dataset/gen-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1635 max words, 200 samples - at ../dataset/gen-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1030 max words, 200 samples - at ../dataset/gen-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1245 max words - at ../dataset/shuffle-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1330 max words, 200 samples - at ../dataset/gen-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 775 max words, 200 samples - at ../dataset/gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1695 max words, 200 samples - at ../dataset/gen-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 200 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 965 max words, 200 samples - at ../dataset/gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1850 max words, 200 samples - at ../dataset/gen-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 835 max words, 200 samples - at ../dataset/gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1165 max words - at ../dataset/shuffle-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1630 max words - at ../dataset/shuffle-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1750 max words, 200 samples - at ../dataset/gen-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1590 max words - at ../dataset/shuffle-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1585 max words - at ../dataset/shuffle-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1105 max words, 200 samples - at ../dataset/gen-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1595 max words - at ../dataset/shuffle-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1195 max words - at ../dataset/shuffle-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1730 max words - at ../dataset/shuffle-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1390 max words - at ../dataset/shuffle-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1710 max words, 200 samples - at ../dataset/gen-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1645 max words, 200 samples - at ../dataset/gen-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1350 max words - at ../dataset/shuffle-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1505 max words - at ../dataset/shuffle-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1580 max words, 200 samples - at ../dataset/gen-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1720 max words, 200 samples - at ../dataset/gen-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 840 max words, 200 samples - at ../dataset/gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1070 max words - at ../dataset/shuffle-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1185 max words - at ../dataset/shuffle-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1130 max words - at ../dataset/shuffle-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1885 max words - at ../dataset/shuffle-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1335 max words - at ../dataset/shuffle-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1460 max words - at ../dataset/shuffle-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1575 max words - at ../dataset/shuffle-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1925 max words - at ../dataset/shuffle-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 48 samples (20 token repeat) - 1290 max words - at ../dataset/shuffle-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1240 max words - at ../dataset/shuffle-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1120 max words, 200 samples - at ../dataset/gen-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1145 max words - at ../dataset/shuffle-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1705 max words, 200 samples - at ../dataset/gen-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1520 max words - at ../dataset/shuffle-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1115 max words - at ../dataset/shuffle-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 945 max words, 200 samples - at ../dataset/gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1530 max words, 200 samples - at ../dataset/gen-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 62 samples (20 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1720 max words - at ../dataset/shuffle-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1805 max words - at ../dataset/shuffle-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1670 max words, 200 samples - at ../dataset/gen-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 940 max words, 200 samples - at ../dataset/gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1955 max words - at ../dataset/shuffle-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 955 max words, 200 samples - at ../dataset/gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1825 max words, 200 samples - at ../dataset/gen-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1645 max words - at ../dataset/shuffle-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1615 max words, 200 samples - at ../dataset/gen-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 685 max words, 200 samples - at ../dataset/gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1605 max words, 200 samples - at ../dataset/gen-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1965 max words, 200 samples - at ../dataset/gen-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1485 max words - at ../dataset/shuffle-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1505 max words, 200 samples - at ../dataset/gen-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1510 max words, 200 samples - at ../dataset/gen-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1335 max words, 200 samples - at ../dataset/gen-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 990 max words, 200 samples - at ../dataset/gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 960 max words, 200 samples - at ../dataset/gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1990 max words - at ../dataset/shuffle-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1685 max words, 200 samples - at ../dataset/gen-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1095 max words - at ../dataset/shuffle-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1135 max words - at ../dataset/shuffle-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1875 max words - at ../dataset/shuffle-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1920 max words - at ../dataset/shuffle-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 950 max words, 200 samples - at ../dataset/gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1975 max words - at ../dataset/shuffle-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1625 max words, 200 samples - at ../dataset/gen-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1355 max words - at ../dataset/shuffle-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 200 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1315 max words, 200 samples - at ../dataset/gen-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1495 max words - at ../dataset/shuffle-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 61 samples (20 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 46 samples (20 token repeat) - 1235 max words - at ../dataset/shuffle-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1980 max words - at ../dataset/shuffle-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1420 max words - at ../dataset/shuffle-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1935 max words - at ../dataset/shuffle-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1215 max words, 200 samples - at ../dataset/gen-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1825 max words - at ../dataset/shuffle-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1840 max words, 200 samples - at ../dataset/gen-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1995 max words, 200 samples - at ../dataset/gen-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 42 samples (20 token repeat) - 1365 max words - at ../dataset/shuffle-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1350 max words, 200 samples - at ../dataset/gen-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1040 max words, 200 samples - at ../dataset/gen-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1325 max words, 200 samples - at ../dataset/gen-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1755 max words - at ../dataset/shuffle-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1270 max words, 200 samples - at ../dataset/gen-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1495 max words, 200 samples - at ../dataset/gen-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1860 max words - at ../dataset/shuffle-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1740 max words, 200 samples - at ../dataset/gen-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1180 max words - at ../dataset/shuffle-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1640 max words, 200 samples - at ../dataset/gen-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1690 max words - at ../dataset/shuffle-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1770 max words - at ../dataset/shuffle-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1435 max words, 200 samples - at ../dataset/gen-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1015 max words, 200 samples - at ../dataset/gen-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1930 max words, 200 samples - at ../dataset/gen-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1440 max words, 200 samples - at ../dataset/gen-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1730 max words, 200 samples - at ../dataset/gen-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1690 max words, 200 samples - at ../dataset/gen-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1455 max words, 200 samples - at ../dataset/gen-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 200 samples (20 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1090 max words, 200 samples - at ../dataset/gen-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 695 max words, 200 samples - at ../dataset/gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 975 max words, 200 samples - at ../dataset/gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1835 max words - at ../dataset/shuffle-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1755 max words, 200 samples - at ../dataset/gen-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1370 max words, 200 samples - at ../dataset/gen-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1810 max words - at ../dataset/shuffle-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1595 max words, 200 samples - at ../dataset/gen-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1550 max words - at ../dataset/shuffle-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1110 max words, 200 samples - at ../dataset/gen-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1190 max words, 200 samples - at ../dataset/gen-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1220 max words, 200 samples - at ../dataset/gen-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1210 max words, 200 samples - at ../dataset/gen-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1870 max words, 200 samples - at ../dataset/gen-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1170 max words - at ../dataset/shuffle-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1560 max words - at ../dataset/shuffle-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1545 max words - at ../dataset/shuffle-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1590 max words, 200 samples - at ../dataset/gen-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 200 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1790 max words - at ../dataset/shuffle-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1745 max words - at ../dataset/shuffle-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1050 max words, 200 samples - at ../dataset/gen-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1890 max words - at ../dataset/shuffle-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1750 max words - at ../dataset/shuffle-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1405 max words, 200 samples - at ../dataset/gen-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1250 max words, 200 samples - at ../dataset/gen-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1465 max words, 200 samples - at ../dataset/gen-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1610 max words, 200 samples - at ../dataset/gen-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1760 max words, 200 samples - at ../dataset/gen-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1970 max words - at ../dataset/shuffle-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1480 max words - at ../dataset/shuffle-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1165 max words, 200 samples - at ../dataset/gen-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1340 max words, 200 samples - at ../dataset/gen-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1045 max words, 200 samples - at ../dataset/gen-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1725 max words, 200 samples - at ../dataset/gen-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1940 max words, 200 samples - at ../dataset/gen-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1070 max words, 200 samples - at ../dataset/gen-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 660 max words, 200 samples - at ../dataset/gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1675 max words, 200 samples - at ../dataset/gen-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1360 max words, 200 samples - at ../dataset/gen-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1850 max words - at ../dataset/shuffle-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1925 max words, 200 samples - at ../dataset/gen-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 980 max words, 200 samples - at ../dataset/gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1770 max words, 200 samples - at ../dataset/gen-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1895 max words - at ../dataset/shuffle-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 42 samples (20 token repeat) - 1345 max words - at ../dataset/shuffle-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1780 max words, 200 samples - at ../dataset/gen-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1665 max words - at ../dataset/shuffle-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1845 max words - at ../dataset/shuffle-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1655 max words, 200 samples - at ../dataset/gen-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1355 max words, 200 samples - at ../dataset/gen-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1295 max words, 200 samples - at ../dataset/gen-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1865 max words - at ../dataset/shuffle-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1365 max words, 200 samples - at ../dataset/gen-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1445 max words, 200 samples - at ../dataset/gen-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1320 max words, 200 samples - at ../dataset/gen-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 985 max words, 200 samples - at ../dataset/gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1670 max words - at ../dataset/shuffle-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1480 max words, 200 samples - at ../dataset/gen-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1570 max words - at ../dataset/shuffle-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1760 max words - at ../dataset/shuffle-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1230 max words, 200 samples - at ../dataset/gen-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1710 max words - at ../dataset/shuffle-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1035 max words, 200 samples - at ../dataset/gen-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1380 max words, 200 samples - at ../dataset/gen-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 200 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1675 max words - at ../dataset/shuffle-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1765 max words, 200 samples - at ../dataset/gen-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1290 max words, 200 samples - at ../dataset/gen-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1095 max words, 200 samples - at ../dataset/gen-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1910 max words - at ../dataset/shuffle-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1280 max words, 200 samples - at ../dataset/gen-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1570 max words, 200 samples - at ../dataset/gen-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1745 max words, 200 samples - at ../dataset/gen-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 655 max words, 200 samples - at ../dataset/gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1965 max words - at ../dataset/shuffle-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1785 max words - at ../dataset/shuffle-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1550 max words, 200 samples - at ../dataset/gen-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 715 max words, 200 samples - at ../dataset/gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1375 max words, 200 samples - at ../dataset/gen-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1985 max words, 200 samples - at ../dataset/gen-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1170 max words, 200 samples - at ../dataset/gen-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1245 max words, 200 samples - at ../dataset/gen-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1395 max words, 200 samples - at ../dataset/gen-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1855 max words, 200 samples - at ../dataset/gen-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1080 max words - at ../dataset/shuffle-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1725 max words - at ../dataset/shuffle-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1150 max words, 200 samples - at ../dataset/gen-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1960 max words - at ../dataset/shuffle-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1705 max words - at ../dataset/shuffle-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1945 max words - at ../dataset/shuffle-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1775 max words - at ../dataset/shuffle-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 770 max words, 200 samples - at ../dataset/gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 200 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1460 max words, 200 samples - at ../dataset/gen-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1715 max words, 200 samples - at ../dataset/gen-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1630 max words, 200 samples - at ../dataset/gen-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1790 max words, 200 samples - at ../dataset/gen-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1525 max words, 200 samples - at ../dataset/gen-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1475 max words - at ../dataset/shuffle-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1565 max words, 200 samples - at ../dataset/gen-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1425 max words - at ../dataset/shuffle-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 995 max words, 200 samples - at ../dataset/gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1395 max words - at ../dataset/shuffle-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1905 max words, 200 samples - at ../dataset/gen-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1830 max words, 200 samples - at ../dataset/gen-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 200 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 760 max words, 200 samples - at ../dataset/gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1915 max words, 200 samples - at ../dataset/gen-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1975 max words, 200 samples - at ../dataset/gen-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1470 max words, 200 samples - at ../dataset/gen-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 890 max words, 200 samples - at ../dataset/gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1150 max words - at ../dataset/shuffle-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1865 max words, 200 samples - at ../dataset/gen-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1275 max words, 200 samples - at ../dataset/gen-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1185 max words, 200 samples - at ../dataset/gen-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1535 max words, 200 samples - at ../dataset/gen-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1125 max words, 200 samples - at ../dataset/gen-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1680 max words, 200 samples - at ../dataset/gen-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1310 max words, 200 samples - at ../dataset/gen-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1490 max words, 200 samples - at ../dataset/gen-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1665 max words, 200 samples - at ../dataset/gen-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1430 max words, 200 samples - at ../dataset/gen-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 200 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1385 max words, 200 samples - at ../dataset/gen-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1815 max words, 200 samples - at ../dataset/gen-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1860 max words, 200 samples - at ../dataset/gen-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1155 max words, 200 samples - at ../dataset/gen-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1935 max words, 200 samples - at ../dataset/gen-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1835 max words, 200 samples - at ../dataset/gen-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1960 max words, 200 samples - at ../dataset/gen-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1650 max words, 200 samples - at ../dataset/gen-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1485 max words, 200 samples - at ../dataset/gen-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1080 max words, 200 samples - at ../dataset/gen-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1545 max words, 200 samples - at ../dataset/gen-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1255 max words, 200 samples - at ../dataset/gen-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1735 max words, 200 samples - at ../dataset/gen-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1160 max words, 200 samples - at ../dataset/gen-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1260 max words, 200 samples - at ../dataset/gen-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1620 max words, 200 samples - at ../dataset/gen-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1990 max words, 200 samples - at ../dataset/gen-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1875 max words, 200 samples - at ../dataset/gen-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1885 max words, 200 samples - at ../dataset/gen-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1895 max words, 200 samples - at ../dataset/gen-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1520 max words, 200 samples - at ../dataset/gen-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1920 max words, 200 samples - at ../dataset/gen-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1805 max words, 200 samples - at ../dataset/gen-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1450 max words, 200 samples - at ../dataset/gen-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1660 max words, 200 samples - at ../dataset/gen-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1240 max words, 200 samples - at ../dataset/gen-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1235 max words, 200 samples - at ../dataset/gen-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1575 max words, 200 samples - at ../dataset/gen-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1980 max words, 200 samples - at ../dataset/gen-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1945 max words, 200 samples - at ../dataset/gen-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1775 max words, 200 samples - at ../dataset/gen-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 200 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1585 max words, 200 samples - at ../dataset/gen-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1955 max words, 200 samples - at ../dataset/gen-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1880 max words, 200 samples - at ../dataset/gen-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1785 max words, 200 samples - at ../dataset/gen-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1890 max words, 200 samples - at ../dataset/gen-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1425 max words, 200 samples - at ../dataset/gen-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1820 max words, 200 samples - at ../dataset/gen-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1970 max words, 200 samples - at ../dataset/gen-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 965M\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x  2 root root   36K Aug 24 02:34 .\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x 11 root root   230 Aug 24 02:33 ..\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   20K Aug 24 02:34 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  107K Aug 24 02:34 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  224K Aug 24 02:34 gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:34 gen-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  238K Aug 24 02:34 gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  239K Aug 24 02:34 gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:34 gen-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  256K Aug 24 02:34 gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:34 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  261K Aug 24 02:34 gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:34 gen-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  280K Aug 24 02:34 gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:34 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  284K Aug 24 02:34 gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:34 gen-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  294K Aug 24 02:34 gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:34 gen-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  306K Aug 24 02:34 gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:34 gen-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   24K Aug 24 02:34 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  306K Aug 24 02:34 gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:34 gen-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  324K Aug 24 02:34 gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:34 gen-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  331K Aug 24 02:34 gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:34 gen-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  339K Aug 24 02:34 gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:34 gen-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  355K Aug 24 02:34 gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:34 gen-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  362K Aug 24 02:34 gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:34 gen-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  371K Aug 24 02:34 gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:34 gen-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  377K Aug 24 02:34 gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:34 gen-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  393K Aug 24 02:34 gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:34 gen-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  396K Aug 24 02:34 gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:34 gen-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:34 gen-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   31K Aug 24 02:34 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  410K Aug 24 02:34 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:34 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  423K Aug 24 02:34 gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  428K Aug 24 02:34 gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  436K Aug 24 02:34 gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  454K Aug 24 02:34 gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  460K Aug 24 02:34 gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  466K Aug 24 02:34 gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  472K Aug 24 02:34 gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  488K Aug 24 02:34 gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  498K Aug 24 02:34 gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   35K Aug 24 02:34 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  502K Aug 24 02:34 gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  548K Aug 24 02:34 gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  554K Aug 24 02:34 gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  560K Aug 24 02:34 gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  570K Aug 24 02:34 gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  575K Aug 24 02:34 gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  588K Aug 24 02:34 gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   40K Aug 24 02:34 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  606K Aug 24 02:34 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  612K Aug 24 02:34 gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  631K Aug 24 02:34 gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  637K Aug 24 02:34 gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  645K Aug 24 02:34 gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  651K Aug 24 02:34 gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  663K Aug 24 02:34 gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  677K Aug 24 02:34 gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  682K Aug 24 02:34 gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  686K Aug 24 02:34 gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   44K Aug 24 02:34 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  709K Aug 24 02:34 gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  708K Aug 24 02:34 gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  726K Aug 24 02:34 gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  728K Aug 24 02:34 gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  744K Aug 24 02:34 gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  747K Aug 24 02:34 gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  767K Aug 24 02:34 gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  759K Aug 24 02:34 gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  771K Aug 24 02:34 gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  795K Aug 24 02:34 gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   49K Aug 24 02:34 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  797K Aug 24 02:34 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  805K Aug 24 02:34 gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  825K Aug 24 02:34 gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  822K Aug 24 02:34 gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  834K Aug 24 02:34 gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  847K Aug 24 02:34 gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  854K Aug 24 02:34 gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  866K Aug 24 02:34 gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  869K Aug 24 02:34 gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  883K Aug 24 02:34 gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   54K Aug 24 02:34 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  906K Aug 24 02:34 gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  906K Aug 24 02:34 gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  910K Aug 24 02:34 gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  919K Aug 24 02:34 gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  938K Aug 24 02:34 gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  943K Aug 24 02:34 gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  951K Aug 24 02:34 gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  952K Aug 24 02:34 gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  968K Aug 24 02:34 gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  982K Aug 24 02:34 gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   15K Aug 24 02:34 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   58K Aug 24 02:34 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  989K Aug 24 02:34 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1006K Aug 24 02:34 gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1020K Aug 24 02:34 gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1020K Aug 24 02:34 gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   63K Aug 24 02:34 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:34 gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   68K Aug 24 02:34 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:34 gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   71K Aug 24 02:34 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:34 gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   79K Aug 24 02:34 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:34 gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   83K Aug 24 02:34 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:34 gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   87K Aug 24 02:34 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:34 gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   89K Aug 24 02:34 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:34 gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   98K Aug 24 02:34 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:34 gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  102K Aug 24 02:34 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:34 gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:34 gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   55K Aug 24 02:34 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:34 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  550K Aug 24 02:34 shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  552K Aug 24 02:34 shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  554K Aug 24 02:34 shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  544K Aug 24 02:34 shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  515K Aug 24 02:34 shuffle-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:34 shuffle-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  550K Aug 24 02:34 shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  547K Aug 24 02:34 shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:34 shuffle-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  547K Aug 24 02:34 shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  547K Aug 24 02:34 shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:34 shuffle-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  547K Aug 24 02:34 shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   43K Aug 24 02:34 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  539K Aug 24 02:34 shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:34 shuffle-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  543K Aug 24 02:34 shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  541K Aug 24 02:34 shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:34 shuffle-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:34 shuffle-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  548K Aug 24 02:34 shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  541K Aug 24 02:34 shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  543K Aug 24 02:34 shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  543K Aug 24 02:34 shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  540K Aug 24 02:34 shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  542K Aug 24 02:34 shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  542K Aug 24 02:34 shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:34 shuffle-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   37K Aug 24 02:34 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  542K Aug 24 02:34 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  538K Aug 24 02:34 shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  535K Aug 24 02:34 shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  536K Aug 24 02:34 shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  538K Aug 24 02:34 shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  536K Aug 24 02:34 shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   36K Aug 24 02:34 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  537K Aug 24 02:34 shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  534K Aug 24 02:34 shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  539K Aug 24 02:34 shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  535K Aug 24 02:34 shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  535K Aug 24 02:34 shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   35K Aug 24 02:34 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  533K Aug 24 02:34 shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   34K Aug 24 02:34 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  535K Aug 24 02:34 shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   31K Aug 24 02:34 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  531K Aug 24 02:34 shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  532K Aug 24 02:34 shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:34 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   78K Aug 24 02:34 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   32K Aug 24 02:34 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:34 shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:34 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:34 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:34 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:34 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:34 shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  529K Aug 24 02:34 shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:34 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:34 shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:34 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:34 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:34 shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:34 shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:34 shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:34 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:34 shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:34 shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:34 shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:34 shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:34 shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:34 shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   12K Aug 24 02:34 word-2-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for lower word count - and shift the focus upwards\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 100 &\n",
+    "for i in {5..100..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 105+ - 1050 words dataset\n",
+    "# \n",
+    "for i in {105..2000..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 200 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -alh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "aef69b97",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:34:54.117442Z",
+     "iopub.status.busy": "2023-08-24T02:34:54.117206Z",
+     "iopub.status.idle": "2023-08-24T02:35:10.030180Z",
+     "shell.execute_reply": "2023-08-24T02:35:10.029262Z"
+    },
+    "papermill": {
+     "duration": 16.02548,
+     "end_time": "2023-08-24T02:35:10.032586",
+     "exception": false,
+     "start_time": "2023-08-24T02:34:54.007106",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-2k (train-ctx=2k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-2k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=2048', '--model.ctx_len=2048', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-2k (train-ctx=2k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-2k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=2048', '--model.ctx_len=2048', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 816267076\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 816267076\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.15.8 is available!  To upgrade, please run:\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.4\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230824_023500-p554d2sz\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-2k (train-ctx=2k, deepspeed_stage_2_offload)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/p554d2sz\u001b[0m\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 254, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
+      "    self.instantiate_classes()\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
+      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
+      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
+      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
+      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
+      "    component.instantiate_class(component, cfg)\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
+      "    parent[key] = group.group_class(**value)\r\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth' does not exist\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-2k (train-ctx=2k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/p554d2sz\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230824_023500-p554d2sz/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/v5base-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-2k (train-ctx=2k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-2k/\" \\\n",
+    "        --model.lr_init=3e-4 \\\n",
+    "        --model.lr_final=1e-4 \\\n",
+    "        --data.max_token_size=2048 \\\n",
+    "        --model.ctx_len=2048 \\\n",
+    "        --model.bptt_learning_range=1 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-1k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "33c1137b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:10.258826Z",
+     "iopub.status.busy": "2023-08-24T02:35:10.258491Z",
+     "iopub.status.idle": "2023-08-24T02:35:13.198225Z",
+     "shell.execute_reply": "2023-08-24T02:35:13.197348Z"
+    },
+    "papermill": {
+     "duration": 3.055248,
+     "end_time": "2023-08-24T02:35:13.199981",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:10.144733",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-2k/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-2k.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-2k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-2k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-2k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "b6f7e3df",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:13.425699Z",
+     "iopub.status.busy": "2023-08-24T02:35:13.424645Z",
+     "iopub.status.idle": "2023-08-24T02:35:18.170662Z",
+     "shell.execute_reply": "2023-08-24T02:35:18.169986Z"
+    },
+    "papermill": {
+     "duration": 4.859506,
+     "end_time": "2023-08-24T02:35:18.172589",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:13.313083",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-ctx-2k.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-2k.pth\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eb978aa9",
+   "metadata": {
+    "papermill": {
+     "duration": 0.108713,
+     "end_time": "2023-08-24T02:35:18.395275",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:18.286562",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 5 : Ramping up the ctx size (4096), memory training\n",
+    "\n",
+    "- Tune 5: Mid ctx size (4096), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "4dacea16",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:18.614238Z",
+     "iopub.status.busy": "2023-08-24T02:35:18.614056Z",
+     "iopub.status.idle": "2023-08-24T02:35:23.171762Z",
+     "shell.execute_reply": "2023-08-24T02:35:23.170488Z"
+    },
+    "papermill": {
+     "duration": 5.002702,
+     "end_time": "2023-08-24T02:35:23.506705",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:18.504003",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 100 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 128 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 43 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 554 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 110 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 263 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 52 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 186 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 105 max words, 100 samples - at ../dataset/gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 61 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 76 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 88 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 150 max words, 100 samples - at ../dataset/gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 100 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 140 max words, 100 samples - at ../dataset/gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 31 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 155 max words, 100 samples - at ../dataset/gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 31 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 275 max words, 100 samples - at ../dataset/gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 130 max words, 100 samples - at ../dataset/gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 170 max words, 100 samples - at ../dataset/gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 110 max words, 100 samples - at ../dataset/gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 355 max words, 100 samples - at ../dataset/gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 315 max words, 100 samples - at ../dataset/gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 245 max words, 100 samples - at ../dataset/gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 195 max words, 100 samples - at ../dataset/gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 225 max words, 100 samples - at ../dataset/gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 270 max words, 100 samples - at ../dataset/gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 325 max words, 100 samples - at ../dataset/gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 330 max words, 100 samples - at ../dataset/gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 160 max words, 100 samples - at ../dataset/gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 450 max words, 100 samples - at ../dataset/gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 390 max words, 100 samples - at ../dataset/gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 365 max words, 100 samples - at ../dataset/gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 655 max words, 200 samples - at ../dataset/gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 340 max words, 100 samples - at ../dataset/gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1585 max words - at ../dataset/shuffle-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2120 max words - at ../dataset/shuffle-word-2120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1980 max words - at ../dataset/shuffle-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1430 max words - at ../dataset/shuffle-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2045 max words - at ../dataset/shuffle-word-2045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1955 max words - at ../dataset/shuffle-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1705 max words - at ../dataset/shuffle-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1860 max words - at ../dataset/shuffle-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 265 max words, 100 samples - at ../dataset/gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 410 max words, 100 samples - at ../dataset/gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1650 max words - at ../dataset/shuffle-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 230 max words, 100 samples - at ../dataset/gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 82 samples (20 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1225 max words - at ../dataset/shuffle-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 165 max words, 100 samples - at ../dataset/gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1995 max words - at ../dataset/shuffle-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2135 max words - at ../dataset/shuffle-word-2135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2030 max words - at ../dataset/shuffle-word-2030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (20 token repeat) - 2575 max words - at ../dataset/shuffle-word-2575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1210 max words - at ../dataset/shuffle-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 280 max words, 100 samples - at ../dataset/gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2275 max words - at ../dataset/shuffle-word-2275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 145 max words, 100 samples - at ../dataset/gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1660 max words - at ../dataset/shuffle-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1340 max words - at ../dataset/shuffle-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 135 max words, 100 samples - at ../dataset/gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1885 max words - at ../dataset/shuffle-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1345 max words - at ../dataset/shuffle-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1575 max words - at ../dataset/shuffle-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 215 max words, 100 samples - at ../dataset/gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 465 max words, 100 samples - at ../dataset/gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 470 max words, 100 samples - at ../dataset/gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2190 max words - at ../dataset/shuffle-word-2190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1665 max words - at ../dataset/shuffle-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1155 max words - at ../dataset/shuffle-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 31 samples (20 token repeat) - 2515 max words - at ../dataset/shuffle-word-2515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2365 max words - at ../dataset/shuffle-word-2365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1970 max words - at ../dataset/shuffle-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1435 max words - at ../dataset/shuffle-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1610 max words - at ../dataset/shuffle-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2290 max words - at ../dataset/shuffle-word-2290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1460 max words - at ../dataset/shuffle-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 98 samples (20 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1270 max words, 200 samples - at ../dataset/gen-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2675 max words - at ../dataset/shuffle-word-2675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1135 max words - at ../dataset/shuffle-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1235 max words - at ../dataset/shuffle-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 29 samples (20 token repeat) - 2590 max words - at ../dataset/shuffle-word-2590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2310 max words - at ../dataset/shuffle-word-2310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1120 max words - at ../dataset/shuffle-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2695 max words - at ../dataset/shuffle-word-2695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2770 max words - at ../dataset/shuffle-word-2770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2895 max words - at ../dataset/shuffle-word-2895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2605 max words - at ../dataset/shuffle-word-2605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 295 max words, 100 samples - at ../dataset/gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1055 max words - at ../dataset/shuffle-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1015 max words - at ../dataset/shuffle-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1465 max words - at ../dataset/shuffle-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1215 max words - at ../dataset/shuffle-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2490 max words - at ../dataset/shuffle-word-2490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 565 max words, 200 samples - at ../dataset/gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1170 max words - at ../dataset/shuffle-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 54 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 31 samples (20 token repeat) - 2535 max words - at ../dataset/shuffle-word-2535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2405 max words - at ../dataset/shuffle-word-2405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 205 max words, 100 samples - at ../dataset/gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1290 max words - at ../dataset/shuffle-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 52 samples (20 token repeat) - 1275 max words - at ../dataset/shuffle-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 725 max words, 200 samples - at ../dataset/gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 125 max words, 100 samples - at ../dataset/gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 980 max words, 200 samples - at ../dataset/gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 575 max words, 200 samples - at ../dataset/gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 435 max words, 100 samples - at ../dataset/gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2345 max words - at ../dataset/shuffle-word-2345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1840 max words - at ../dataset/shuffle-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1065 max words - at ../dataset/shuffle-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1390 max words - at ../dataset/shuffle-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1810 max words - at ../dataset/shuffle-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 42 samples (20 token repeat) - 1320 max words - at ../dataset/shuffle-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1310 max words - at ../dataset/shuffle-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1445 max words - at ../dataset/shuffle-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2140 max words - at ../dataset/shuffle-word-2140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1720 max words - at ../dataset/shuffle-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 335 max words, 100 samples - at ../dataset/gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1590 max words - at ../dataset/shuffle-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1265 max words, 200 samples - at ../dataset/gen-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1505 max words - at ../dataset/shuffle-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 495 max words, 100 samples - at ../dataset/gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1815 max words - at ../dataset/shuffle-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1085 max words - at ../dataset/shuffle-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 350 max words, 100 samples - at ../dataset/gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2735 max words - at ../dataset/shuffle-word-2735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2325 max words - at ../dataset/shuffle-word-2325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2950 max words - at ../dataset/shuffle-word-2950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2270 max words - at ../dataset/shuffle-word-2270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1620 max words - at ../dataset/shuffle-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1265 max words - at ../dataset/shuffle-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1975 max words - at ../dataset/shuffle-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1095 max words - at ../dataset/shuffle-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2915 max words - at ../dataset/shuffle-word-2915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2710 max words - at ../dataset/shuffle-word-2710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1045 max words - at ../dataset/shuffle-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2855 max words - at ../dataset/shuffle-word-2855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1825 max words - at ../dataset/shuffle-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1330 max words - at ../dataset/shuffle-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1260 max words - at ../dataset/shuffle-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1325 max words - at ../dataset/shuffle-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1445 max words, 200 samples - at ../dataset/gen-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 615 max words, 200 samples - at ../dataset/gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1030 max words - at ../dataset/shuffle-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3280 max words - at ../dataset/shuffle-word-3280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 100 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 460 max words, 100 samples - at ../dataset/gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2975 max words - at ../dataset/shuffle-word-2975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2850 max words - at ../dataset/shuffle-word-2850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 210 max words, 100 samples - at ../dataset/gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2080 max words - at ../dataset/shuffle-word-2080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2925 max words - at ../dataset/shuffle-word-2925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1245 max words, 200 samples - at ../dataset/gen-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1830 max words - at ../dataset/shuffle-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1035 max words - at ../dataset/shuffle-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2665 max words - at ../dataset/shuffle-word-2665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1580 max words - at ../dataset/shuffle-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3315 max words - at ../dataset/shuffle-word-3315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1380 max words, 200 samples - at ../dataset/gen-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2780 max words - at ../dataset/shuffle-word-2780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2835 max words - at ../dataset/shuffle-word-2835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2195 max words - at ../dataset/shuffle-word-2195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2740 max words - at ../dataset/shuffle-word-2740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2820 max words - at ../dataset/shuffle-word-2820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1590 max words, 200 samples - at ../dataset/gen-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2340 max words - at ../dataset/shuffle-word-2340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (20 token repeat) - 2660 max words - at ../dataset/shuffle-word-2660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1365 max words - at ../dataset/shuffle-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (20 token repeat) - 2610 max words - at ../dataset/shuffle-word-2610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 520 max words, 200 samples - at ../dataset/gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1260 max words, 200 samples - at ../dataset/gen-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1210 max words, 200 samples - at ../dataset/gen-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3360 max words - at ../dataset/shuffle-word-3360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2335 max words, 200 samples - at ../dataset/gen-word-2335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3795 max words - at ../dataset/shuffle-word-3795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3565 max words - at ../dataset/shuffle-word-3565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2745 max words - at ../dataset/shuffle-word-2745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1875 max words - at ../dataset/shuffle-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3645 max words - at ../dataset/shuffle-word-3645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1255 max words, 200 samples - at ../dataset/gen-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3365 max words - at ../dataset/shuffle-word-3365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2730 max words - at ../dataset/shuffle-word-2730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3370 max words - at ../dataset/shuffle-word-3370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 200 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2615 max words - at ../dataset/shuffle-word-2615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3045 max words - at ../dataset/shuffle-word-3045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 375 max words, 100 samples - at ../dataset/gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 805 max words, 200 samples - at ../dataset/gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3015 max words - at ../dataset/shuffle-word-3015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2680 max words - at ../dataset/shuffle-word-2680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 78 samples (20 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1235 max words, 200 samples - at ../dataset/gen-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2865 max words - at ../dataset/shuffle-word-2865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1555 max words - at ../dataset/shuffle-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3660 max words - at ../dataset/shuffle-word-3660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 31 samples (20 token repeat) - 2570 max words - at ../dataset/shuffle-word-2570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3230 max words - at ../dataset/shuffle-word-3230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2830 max words - at ../dataset/shuffle-word-2830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1240 max words, 200 samples - at ../dataset/gen-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1715 max words - at ../dataset/shuffle-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2205 max words - at ../dataset/shuffle-word-2205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1865 max words - at ../dataset/shuffle-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2125 max words - at ../dataset/shuffle-word-2125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 82 samples (20 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2375 max words - at ../dataset/shuffle-word-2375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1775 max words - at ../dataset/shuffle-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1280 max words, 200 samples - at ../dataset/gen-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2230 max words - at ../dataset/shuffle-word-2230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 27 samples (20 token repeat) - 2520 max words - at ../dataset/shuffle-word-2520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3255 max words - at ../dataset/shuffle-word-3255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2155 max words - at ../dataset/shuffle-word-2155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 995 max words, 200 samples - at ../dataset/gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 845 max words, 200 samples - at ../dataset/gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1910 max words, 200 samples - at ../dataset/gen-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 535 max words, 200 samples - at ../dataset/gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3520 max words - at ../dataset/shuffle-word-3520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 880 max words, 200 samples - at ../dataset/gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (20 token repeat) - 2620 max words - at ../dataset/shuffle-word-2620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2985 max words - at ../dataset/shuffle-word-2985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2270 max words, 200 samples - at ../dataset/gen-word-2270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 175 max words, 100 samples - at ../dataset/gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 620 max words, 200 samples - at ../dataset/gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3790 max words - at ../dataset/shuffle-word-3790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3675 max words - at ../dataset/shuffle-word-3675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3285 max words - at ../dataset/shuffle-word-3285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3840 max words - at ../dataset/shuffle-word-3840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1040 max words, 200 samples - at ../dataset/gen-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1765 max words - at ../dataset/shuffle-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3690 max words - at ../dataset/shuffle-word-3690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3325 max words - at ../dataset/shuffle-word-3325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 290 max words, 100 samples - at ../dataset/gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3450 max words - at ../dataset/shuffle-word-3450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3755 max words - at ../dataset/shuffle-word-3755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1230 max words, 200 samples - at ../dataset/gen-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2285 max words - at ../dataset/shuffle-word-2285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3295 max words - at ../dataset/shuffle-word-3295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2495 max words - at ../dataset/shuffle-word-2495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3340 max words - at ../dataset/shuffle-word-3340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1180 max words, 200 samples - at ../dataset/gen-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 305 max words, 100 samples - at ../dataset/gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3970 max words - at ../dataset/shuffle-word-3970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3820 max words - at ../dataset/shuffle-word-3820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (20 token repeat) - 2580 max words - at ../dataset/shuffle-word-2580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 250 max words, 100 samples - at ../dataset/gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 890 max words, 200 samples - at ../dataset/gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3335 max words - at ../dataset/shuffle-word-3335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3705 max words - at ../dataset/shuffle-word-3705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 480 max words, 100 samples - at ../dataset/gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2910 max words - at ../dataset/shuffle-word-2910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2765 max words - at ../dataset/shuffle-word-2765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3070 max words - at ../dataset/shuffle-word-3070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 33 samples (20 token repeat) - 2530 max words - at ../dataset/shuffle-word-2530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1615 max words - at ../dataset/shuffle-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 32 samples (20 token repeat) - 2565 max words - at ../dataset/shuffle-word-2565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3075 max words - at ../dataset/shuffle-word-3075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 200 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 775 max words, 200 samples - at ../dataset/gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2115 max words - at ../dataset/shuffle-word-2115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3470 max words - at ../dataset/shuffle-word-3470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2335 max words - at ../dataset/shuffle-word-2335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3125 max words - at ../dataset/shuffle-word-3125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 425 max words, 100 samples - at ../dataset/gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1165 max words, 200 samples - at ../dataset/gen-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2360 max words - at ../dataset/shuffle-word-2360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2365 max words, 200 samples - at ../dataset/gen-word-2365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 730 max words, 200 samples - at ../dataset/gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3240 max words - at ../dataset/shuffle-word-3240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3170 max words - at ../dataset/shuffle-word-3170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1305 max words, 200 samples - at ../dataset/gen-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1170 max words, 200 samples - at ../dataset/gen-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3720 max words - at ../dataset/shuffle-word-3720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3265 max words - at ../dataset/shuffle-word-3265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2420 max words, 200 samples - at ../dataset/gen-word-2420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3995 max words - at ../dataset/shuffle-word-3995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3635 max words - at ../dataset/shuffle-word-3635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3575 max words - at ../dataset/shuffle-word-3575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3955 max words - at ../dataset/shuffle-word-3955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 790 max words, 200 samples - at ../dataset/gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3245 max words - at ../dataset/shuffle-word-3245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1250 max words, 200 samples - at ../dataset/gen-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 360 max words, 100 samples - at ../dataset/gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 915 max words, 200 samples - at ../dataset/gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1015 max words, 200 samples - at ../dataset/gen-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 35 samples (20 token repeat) - 2430 max words - at ../dataset/shuffle-word-2430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3480 max words - at ../dataset/shuffle-word-3480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3485 max words - at ../dataset/shuffle-word-3485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3205 max words - at ../dataset/shuffle-word-3205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2090 max words - at ../dataset/shuffle-word-2090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3465 max words - at ../dataset/shuffle-word-3465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 190 max words, 100 samples - at ../dataset/gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 510 max words, 200 samples - at ../dataset/gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3655 max words - at ../dataset/shuffle-word-3655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2695 max words, 200 samples - at ../dataset/gen-word-2695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 555 max words, 200 samples - at ../dataset/gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1935 max words - at ../dataset/shuffle-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3510 max words - at ../dataset/shuffle-word-3510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 255 max words, 100 samples - at ../dataset/gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1635 max words, 200 samples - at ../dataset/gen-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3430 max words - at ../dataset/shuffle-word-3430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3455 max words - at ../dataset/shuffle-word-3455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1620 max words, 200 samples - at ../dataset/gen-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2775 max words - at ../dataset/shuffle-word-2775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 385 max words, 100 samples - at ../dataset/gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 200 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1025 max words, 200 samples - at ../dataset/gen-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (20 token repeat) - 2630 max words - at ../dataset/shuffle-word-2630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1290 max words, 200 samples - at ../dataset/gen-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1030 max words, 200 samples - at ../dataset/gen-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3610 max words - at ../dataset/shuffle-word-3610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2905 max words - at ../dataset/shuffle-word-2905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 475 max words, 100 samples - at ../dataset/gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2405 max words, 200 samples - at ../dataset/gen-word-2405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2725 max words - at ../dataset/shuffle-word-2725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1345 max words, 200 samples - at ../dataset/gen-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3710 max words - at ../dataset/shuffle-word-3710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 735 max words, 200 samples - at ../dataset/gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1090 max words, 200 samples - at ../dataset/gen-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3035 max words - at ../dataset/shuffle-word-3035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1465 max words, 200 samples - at ../dataset/gen-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3595 max words - at ../dataset/shuffle-word-3595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 840 max words, 200 samples - at ../dataset/gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3445 max words - at ../dataset/shuffle-word-3445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1155 max words, 200 samples - at ../dataset/gen-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 260 max words, 100 samples - at ../dataset/gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1150 max words, 200 samples - at ../dataset/gen-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3580 max words - at ../dataset/shuffle-word-3580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3190 max words - at ../dataset/shuffle-word-3190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1880 max words - at ../dataset/shuffle-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 745 max words, 200 samples - at ../dataset/gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3385 max words - at ../dataset/shuffle-word-3385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 200 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1895 max words, 200 samples - at ../dataset/gen-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 455 max words, 100 samples - at ../dataset/gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3570 max words - at ../dataset/shuffle-word-3570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1890 max words, 200 samples - at ../dataset/gen-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 885 max words, 200 samples - at ../dataset/gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3090 max words - at ../dataset/shuffle-word-3090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 505 max words, 200 samples - at ../dataset/gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3910 max words - at ../dataset/shuffle-word-3910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 27 samples (20 token repeat) - 2635 max words - at ../dataset/shuffle-word-2635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3105 max words - at ../dataset/shuffle-word-3105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3950 max words - at ../dataset/shuffle-word-3950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 860 max words, 200 samples - at ../dataset/gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3605 max words - at ../dataset/shuffle-word-3605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1225 max words, 200 samples - at ../dataset/gen-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3785 max words - at ../dataset/shuffle-word-3785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 780 max words, 200 samples - at ../dataset/gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3060 max words - at ../dataset/shuffle-word-3060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 950 max words, 200 samples - at ../dataset/gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3565 max words, 200 samples - at ../dataset/gen-word-3565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2850 max words, 200 samples - at ../dataset/gen-word-2850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1220 max words, 200 samples - at ../dataset/gen-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1770 max words, 200 samples - at ../dataset/gen-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2210 max words, 200 samples - at ../dataset/gen-word-2210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1810 max words, 200 samples - at ../dataset/gen-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2965 max words - at ../dataset/shuffle-word-2965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2410 max words, 200 samples - at ../dataset/gen-word-2410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3895 max words - at ../dataset/shuffle-word-3895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2690 max words - at ../dataset/shuffle-word-2690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1190 max words, 200 samples - at ../dataset/gen-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1285 max words, 200 samples - at ../dataset/gen-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 660 max words, 200 samples - at ../dataset/gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1860 max words, 200 samples - at ../dataset/gen-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1360 max words, 200 samples - at ../dataset/gen-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1385 max words, 200 samples - at ../dataset/gen-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2880 max words - at ../dataset/shuffle-word-2880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 910 max words, 200 samples - at ../dataset/gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1425 max words, 200 samples - at ../dataset/gen-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1715 max words, 200 samples - at ../dataset/gen-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (20 token repeat) - 2560 max words - at ../dataset/shuffle-word-2560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3530 max words - at ../dataset/shuffle-word-3530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 605 max words, 200 samples - at ../dataset/gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3945 max words - at ../dataset/shuffle-word-3945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 200 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3925 max words - at ../dataset/shuffle-word-3925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1640 max words, 200 samples - at ../dataset/gen-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1695 max words - at ../dataset/shuffle-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3505 max words - at ../dataset/shuffle-word-3505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2475 max words - at ../dataset/shuffle-word-2475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 285 max words, 100 samples - at ../dataset/gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3490 max words - at ../dataset/shuffle-word-3490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 66 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3495 max words - at ../dataset/shuffle-word-3495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1455 max words, 200 samples - at ../dataset/gen-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3535 max words - at ../dataset/shuffle-word-3535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3630 max words - at ../dataset/shuffle-word-3630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 240 max words, 100 samples - at ../dataset/gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1630 max words, 200 samples - at ../dataset/gen-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1765 max words, 200 samples - at ../dataset/gen-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 220 max words, 100 samples - at ../dataset/gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 785 max words, 200 samples - at ../dataset/gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2380 max words - at ../dataset/shuffle-word-2380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2920 max words - at ../dataset/shuffle-word-2920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1080 max words, 200 samples - at ../dataset/gen-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2485 max words - at ../dataset/shuffle-word-2485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 570 max words, 200 samples - at ../dataset/gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3320 max words, 200 samples - at ../dataset/gen-word-3320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1005 max words - at ../dataset/shuffle-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1945 max words, 200 samples - at ../dataset/gen-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 115 max words, 100 samples - at ../dataset/gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3590 max words - at ../dataset/shuffle-word-3590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1660 max words, 200 samples - at ../dataset/gen-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 235 max words, 100 samples - at ../dataset/gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 940 max words, 200 samples - at ../dataset/gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1960 max words, 200 samples - at ../dataset/gen-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2075 max words, 200 samples - at ../dataset/gen-word-2075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1545 max words, 200 samples - at ../dataset/gen-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 58 samples (20 token repeat) - 1150 max words - at ../dataset/shuffle-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 395 max words, 100 samples - at ../dataset/gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 740 max words, 200 samples - at ../dataset/gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1735 max words, 200 samples - at ../dataset/gen-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 380 max words, 100 samples - at ../dataset/gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2880 max words, 200 samples - at ../dataset/gen-word-2880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2125 max words, 200 samples - at ../dataset/gen-word-2125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 100 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2955 max words - at ../dataset/shuffle-word-2955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2940 max words - at ../dataset/shuffle-word-2940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 430 max words, 100 samples - at ../dataset/gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1080 max words - at ../dataset/shuffle-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1665 max words, 200 samples - at ../dataset/gen-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3275 max words, 200 samples - at ../dataset/gen-word-3275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1460 max words, 200 samples - at ../dataset/gen-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 525 max words, 200 samples - at ../dataset/gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2160 max words, 200 samples - at ../dataset/gen-word-2160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3425 max words - at ../dataset/shuffle-word-3425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1145 max words - at ../dataset/shuffle-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 485 max words, 100 samples - at ../dataset/gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 415 max words, 100 samples - at ../dataset/gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 62 samples (20 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1950 max words, 200 samples - at ../dataset/gen-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 540 max words, 200 samples - at ../dataset/gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 665 max words, 200 samples - at ../dataset/gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 635 max words, 200 samples - at ../dataset/gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3620 max words - at ../dataset/shuffle-word-3620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2295 max words, 200 samples - at ../dataset/gen-word-2295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 855 max words, 200 samples - at ../dataset/gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 640 max words, 200 samples - at ../dataset/gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 720 max words, 200 samples - at ../dataset/gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2330 max words - at ../dataset/shuffle-word-2330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1110 max words - at ../dataset/shuffle-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 200 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3405 max words - at ../dataset/shuffle-word-3405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1130 max words, 200 samples - at ../dataset/gen-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1105 max words - at ../dataset/shuffle-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1040 max words - at ../dataset/shuffle-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2875 max words - at ../dataset/shuffle-word-2875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 82 samples (20 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 66 samples (20 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 200 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1160 max words - at ../dataset/shuffle-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1670 max words, 200 samples - at ../dataset/gen-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 810 max words, 200 samples - at ../dataset/gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 66 samples (20 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 52 samples (20 token repeat) - 1255 max words - at ../dataset/shuffle-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1530 max words, 200 samples - at ../dataset/gen-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 815 max words, 200 samples - at ../dataset/gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1010 max words - at ../dataset/shuffle-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 630 max words, 200 samples - at ../dataset/gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3680 max words - at ../dataset/shuffle-word-3680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1850 max words, 200 samples - at ../dataset/gen-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1940 max words - at ../dataset/shuffle-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 64 samples (20 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 895 max words, 200 samples - at ../dataset/gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1075 max words - at ../dataset/shuffle-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 420 max words, 100 samples - at ../dataset/gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3025 max words - at ../dataset/shuffle-word-3025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2230 max words, 200 samples - at ../dataset/gen-word-2230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 710 max words, 200 samples - at ../dataset/gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1060 max words - at ../dataset/shuffle-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 550 max words, 200 samples - at ../dataset/gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 49 samples (20 token repeat) - 1205 max words - at ../dataset/shuffle-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2240 max words, 200 samples - at ../dataset/gen-word-2240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1140 max words - at ../dataset/shuffle-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1180 max words - at ../dataset/shuffle-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2355 max words, 200 samples - at ../dataset/gen-word-2355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1090 max words - at ../dataset/shuffle-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1975 max words, 200 samples - at ../dataset/gen-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 370 max words, 100 samples - at ../dataset/gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 54 samples (20 token repeat) - 1220 max words - at ../dataset/shuffle-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2500 max words, 200 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 65 samples (20 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 310 max words, 100 samples - at ../dataset/gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2250 max words, 200 samples - at ../dataset/gen-word-2250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 62 samples (20 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2390 max words, 200 samples - at ../dataset/gen-word-2390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1125 max words - at ../dataset/shuffle-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1375 max words, 200 samples - at ../dataset/gen-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1115 max words - at ../dataset/shuffle-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 200 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 180 max words, 100 samples - at ../dataset/gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 675 max words, 200 samples - at ../dataset/gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 405 max words, 100 samples - at ../dataset/gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1195 max words - at ../dataset/shuffle-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1570 max words, 200 samples - at ../dataset/gen-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 515 max words, 200 samples - at ../dataset/gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2545 max words, 200 samples - at ../dataset/gen-word-2545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 120 max words, 100 samples - at ../dataset/gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3860 max words - at ../dataset/shuffle-word-3860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 185 max words, 100 samples - at ../dataset/gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1870 max words, 200 samples - at ../dataset/gen-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3110 max words - at ../dataset/shuffle-word-3110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1020 max words, 200 samples - at ../dataset/gen-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3460 max words - at ../dataset/shuffle-word-3460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1230 max words - at ../dataset/shuffle-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2845 max words - at ../dataset/shuffle-word-2845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 835 max words, 200 samples - at ../dataset/gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 200 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1240 max words - at ../dataset/shuffle-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 590 max words, 200 samples - at ../dataset/gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2980 max words - at ../dataset/shuffle-word-2980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2100 max words, 200 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 795 max words, 200 samples - at ../dataset/gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1515 max words - at ../dataset/shuffle-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2960 max words, 200 samples - at ../dataset/gen-word-2960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1075 max words, 200 samples - at ../dataset/gen-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 970 max words, 200 samples - at ../dataset/gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2225 max words, 200 samples - at ../dataset/gen-word-2225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1990 max words, 200 samples - at ../dataset/gen-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 830 max words, 200 samples - at ../dataset/gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2245 max words, 200 samples - at ../dataset/gen-word-2245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 320 max words, 100 samples - at ../dataset/gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1070 max words - at ../dataset/shuffle-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 690 max words, 200 samples - at ../dataset/gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2505 max words, 200 samples - at ../dataset/gen-word-2505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2320 max words - at ../dataset/shuffle-word-2320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 58 samples (20 token repeat) - 1190 max words - at ../dataset/shuffle-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 345 max words, 100 samples - at ../dataset/gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3010 max words, 200 samples - at ../dataset/gen-word-3010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3155 max words - at ../dataset/shuffle-word-3155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1125 max words, 200 samples - at ../dataset/gen-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 100 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1595 max words - at ../dataset/shuffle-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1020 max words - at ../dataset/shuffle-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1165 max words - at ../dataset/shuffle-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2140 max words, 200 samples - at ../dataset/gen-word-2140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2970 max words - at ../dataset/shuffle-word-2970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1175 max words - at ../dataset/shuffle-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 770 max words, 200 samples - at ../dataset/gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 865 max words, 200 samples - at ../dataset/gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2200 max words, 200 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1985 max words - at ../dataset/shuffle-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1185 max words - at ../dataset/shuffle-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3805 max words - at ../dataset/shuffle-word-3805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3615 max words - at ../dataset/shuffle-word-3615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1130 max words - at ../dataset/shuffle-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3040 max words - at ../dataset/shuffle-word-3040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3090 max words, 200 samples - at ../dataset/gen-word-3090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 445 max words, 100 samples - at ../dataset/gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1430 max words, 200 samples - at ../dataset/gen-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 490 max words, 100 samples - at ../dataset/gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2115 max words, 200 samples - at ../dataset/gen-word-2115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 585 max words, 200 samples - at ../dataset/gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2065 max words - at ../dataset/shuffle-word-2065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1655 max words - at ../dataset/shuffle-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 850 max words, 200 samples - at ../dataset/gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 545 max words, 200 samples - at ../dataset/gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 925 max words, 200 samples - at ../dataset/gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1025 max words - at ../dataset/shuffle-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1050 max words - at ../dataset/shuffle-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 715 max words, 200 samples - at ../dataset/gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 755 max words, 200 samples - at ../dataset/gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 52 samples (20 token repeat) - 1295 max words - at ../dataset/shuffle-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1480 max words - at ../dataset/shuffle-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2085 max words, 200 samples - at ../dataset/gen-word-2085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1570 max words - at ../dataset/shuffle-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1525 max words - at ../dataset/shuffle-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1935 max words, 200 samples - at ../dataset/gen-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 825 max words, 200 samples - at ../dataset/gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 705 max words, 200 samples - at ../dataset/gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2635 max words, 200 samples - at ../dataset/gen-word-2635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1335 max words - at ../dataset/shuffle-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1535 max words - at ../dataset/shuffle-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 870 max words, 200 samples - at ../dataset/gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1605 max words - at ../dataset/shuffle-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 42 samples (20 token repeat) - 1305 max words - at ../dataset/shuffle-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1450 max words - at ../dataset/shuffle-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2150 max words, 200 samples - at ../dataset/gen-word-2150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1725 max words - at ../dataset/shuffle-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 680 max words, 200 samples - at ../dataset/gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3280 max words, 200 samples - at ../dataset/gen-word-3280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 875 max words, 200 samples - at ../dataset/gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1355 max words - at ../dataset/shuffle-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 960 max words, 200 samples - at ../dataset/gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1415 max words - at ../dataset/shuffle-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1375 max words - at ../dataset/shuffle-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2135 max words, 200 samples - at ../dataset/gen-word-2135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 650 max words, 200 samples - at ../dataset/gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1410 max words - at ../dataset/shuffle-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 765 max words, 200 samples - at ../dataset/gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 625 max words, 200 samples - at ../dataset/gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1820 max words, 200 samples - at ../dataset/gen-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1780 max words - at ../dataset/shuffle-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2060 max words, 200 samples - at ../dataset/gen-word-2060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1670 max words - at ../dataset/shuffle-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1285 max words - at ../dataset/shuffle-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 47 samples (20 token repeat) - 1270 max words - at ../dataset/shuffle-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1495 max words - at ../dataset/shuffle-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 670 max words, 200 samples - at ../dataset/gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1680 max words - at ../dataset/shuffle-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1645 max words - at ../dataset/shuffle-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2495 max words, 200 samples - at ../dataset/gen-word-2495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1730 max words - at ../dataset/shuffle-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 645 max words, 200 samples - at ../dataset/gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1510 max words - at ../dataset/shuffle-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1740 max words - at ../dataset/shuffle-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1550 max words - at ../dataset/shuffle-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1470 max words - at ../dataset/shuffle-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1405 max words - at ../dataset/shuffle-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1685 max words - at ../dataset/shuffle-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1930 max words - at ../dataset/shuffle-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1350 max words - at ../dataset/shuffle-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1490 max words - at ../dataset/shuffle-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 610 max words, 200 samples - at ../dataset/gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1690 max words - at ../dataset/shuffle-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1850 max words - at ../dataset/shuffle-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1630 max words - at ../dataset/shuffle-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 685 max words, 200 samples - at ../dataset/gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1385 max words - at ../dataset/shuffle-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1855 max words - at ../dataset/shuffle-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1530 max words - at ../dataset/shuffle-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1425 max words - at ../dataset/shuffle-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1475 max words - at ../dataset/shuffle-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 53 samples (20 token repeat) - 1250 max words - at ../dataset/shuffle-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 965 max words, 200 samples - at ../dataset/gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2520 max words, 200 samples - at ../dataset/gen-word-2520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1745 max words - at ../dataset/shuffle-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1010 max words, 200 samples - at ../dataset/gen-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1320 max words, 200 samples - at ../dataset/gen-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1145 max words, 200 samples - at ../dataset/gen-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1280 max words - at ../dataset/shuffle-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1315 max words - at ../dataset/shuffle-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 595 max words, 200 samples - at ../dataset/gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1945 max words - at ../dataset/shuffle-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1395 max words - at ../dataset/shuffle-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1990 max words - at ../dataset/shuffle-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1950 max words - at ../dataset/shuffle-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1420 max words - at ../dataset/shuffle-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1380 max words - at ../dataset/shuffle-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 750 max words, 200 samples - at ../dataset/gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2080 max words, 200 samples - at ../dataset/gen-word-2080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 820 max words, 200 samples - at ../dataset/gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1795 max words - at ../dataset/shuffle-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1545 max words - at ../dataset/shuffle-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1520 max words - at ../dataset/shuffle-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1835 max words - at ../dataset/shuffle-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 530 max words, 200 samples - at ../dataset/gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 580 max words, 200 samples - at ../dataset/gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1370 max words - at ../dataset/shuffle-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2700 max words, 200 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1105 max words, 200 samples - at ../dataset/gen-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1920 max words - at ../dataset/shuffle-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1890 max words - at ../dataset/shuffle-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2630 max words, 200 samples - at ../dataset/gen-word-2630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1560 max words - at ../dataset/shuffle-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1905 max words - at ../dataset/shuffle-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 990 max words, 200 samples - at ../dataset/gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2075 max words - at ../dataset/shuffle-word-2075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1770 max words - at ../dataset/shuffle-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1115 max words, 200 samples - at ../dataset/gen-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1790 max words - at ../dataset/shuffle-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2070 max words, 200 samples - at ../dataset/gen-word-2070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1245 max words - at ../dataset/shuffle-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 975 max words, 200 samples - at ../dataset/gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1870 max words - at ../dataset/shuffle-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1845 max words - at ../dataset/shuffle-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2660 max words, 200 samples - at ../dataset/gen-word-2660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1905 max words, 200 samples - at ../dataset/gen-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2145 max words - at ../dataset/shuffle-word-2145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1060 max words, 200 samples - at ../dataset/gen-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1640 max words - at ../dataset/shuffle-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1785 max words, 200 samples - at ../dataset/gen-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 200 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1755 max words - at ../dataset/shuffle-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 200 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1915 max words - at ../dataset/shuffle-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2305 max words - at ../dataset/shuffle-word-2305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1940 max words, 200 samples - at ../dataset/gen-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1635 max words - at ../dataset/shuffle-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 42 samples (20 token repeat) - 1360 max words - at ../dataset/shuffle-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1065 max words, 200 samples - at ../dataset/gen-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1485 max words - at ../dataset/shuffle-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1085 max words, 200 samples - at ../dataset/gen-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2260 max words - at ../dataset/shuffle-word-2260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2420 max words - at ../dataset/shuffle-word-2420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 935 max words, 200 samples - at ../dataset/gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (20 token repeat) - 2425 max words - at ../dataset/shuffle-word-2425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1820 max words - at ../dataset/shuffle-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2430 max words, 200 samples - at ../dataset/gen-word-2430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1785 max words - at ../dataset/shuffle-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1750 max words - at ../dataset/shuffle-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3445 max words, 200 samples - at ../dataset/gen-word-3445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 945 max words, 200 samples - at ../dataset/gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1565 max words - at ../dataset/shuffle-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 560 max words, 200 samples - at ../dataset/gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1735 max words - at ../dataset/shuffle-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1805 max words - at ../dataset/shuffle-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2110 max words, 200 samples - at ../dataset/gen-word-2110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2160 max words - at ../dataset/shuffle-word-2160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2965 max words, 200 samples - at ../dataset/gen-word-2965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2020 max words, 200 samples - at ../dataset/gen-word-2020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2165 max words - at ../dataset/shuffle-word-2165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2355 max words - at ../dataset/shuffle-word-2355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2130 max words, 200 samples - at ../dataset/gen-word-2130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1070 max words, 200 samples - at ../dataset/gen-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 440 max words, 100 samples - at ../dataset/gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3100 max words, 200 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2130 max words - at ../dataset/shuffle-word-2130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 35 samples (20 token repeat) - 2440 max words - at ../dataset/shuffle-word-2440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 695 max words, 200 samples - at ../dataset/gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2050 max words - at ../dataset/shuffle-word-2050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2375 max words, 200 samples - at ../dataset/gen-word-2375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1215 max words, 200 samples - at ../dataset/gen-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (20 token repeat) - 2450 max words - at ../dataset/shuffle-word-2450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1795 max words, 200 samples - at ../dataset/gen-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1760 max words, 200 samples - at ../dataset/gen-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1895 max words - at ../dataset/shuffle-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 760 max words, 200 samples - at ../dataset/gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2290 max words, 200 samples - at ../dataset/gen-word-2290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2105 max words - at ../dataset/shuffle-word-2105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2255 max words - at ../dataset/shuffle-word-2255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1135 max words, 200 samples - at ../dataset/gen-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1925 max words - at ../dataset/shuffle-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1140 max words, 200 samples - at ../dataset/gen-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2035 max words - at ../dataset/shuffle-word-2035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1275 max words, 200 samples - at ../dataset/gen-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2460 max words - at ../dataset/shuffle-word-2460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 920 max words, 200 samples - at ../dataset/gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2235 max words - at ../dataset/shuffle-word-2235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1035 max words, 200 samples - at ../dataset/gen-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 905 max words, 200 samples - at ../dataset/gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2575 max words, 200 samples - at ../dataset/gen-word-2575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2210 max words - at ../dataset/shuffle-word-2210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (20 token repeat) - 2525 max words - at ../dataset/shuffle-word-2525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1055 max words, 200 samples - at ../dataset/gen-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1750 max words, 200 samples - at ../dataset/gen-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2055 max words - at ../dataset/shuffle-word-2055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1175 max words, 200 samples - at ../dataset/gen-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2175 max words - at ../dataset/shuffle-word-2175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2240 max words - at ../dataset/shuffle-word-2240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1005 max words, 200 samples - at ../dataset/gen-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 200 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2470 max words, 200 samples - at ../dataset/gen-word-2470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 32 samples (20 token repeat) - 2595 max words - at ../dataset/shuffle-word-2595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 29 samples (20 token repeat) - 2545 max words - at ../dataset/shuffle-word-2545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (20 token repeat) - 2445 max words - at ../dataset/shuffle-word-2445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1675 max words, 200 samples - at ../dataset/gen-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1710 max words - at ../dataset/shuffle-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1205 max words, 200 samples - at ../dataset/gen-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1050 max words, 200 samples - at ../dataset/gen-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1095 max words, 200 samples - at ../dataset/gen-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 955 max words, 200 samples - at ../dataset/gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2760 max words, 200 samples - at ../dataset/gen-word-2760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 930 max words, 200 samples - at ../dataset/gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3385 max words, 200 samples - at ../dataset/gen-word-3385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2590 max words, 200 samples - at ../dataset/gen-word-2590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1185 max words, 200 samples - at ../dataset/gen-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2325 max words, 200 samples - at ../dataset/gen-word-2325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2120 max words, 200 samples - at ../dataset/gen-word-2120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2670 max words, 200 samples - at ../dataset/gen-word-2670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 27 samples (20 token repeat) - 2650 max words - at ../dataset/shuffle-word-2650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2195 max words, 200 samples - at ../dataset/gen-word-2195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3150 max words, 200 samples - at ../dataset/gen-word-3150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (20 token repeat) - 2625 max words - at ../dataset/shuffle-word-2625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1675 max words - at ../dataset/shuffle-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1440 max words - at ../dataset/shuffle-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 28 samples (20 token repeat) - 2555 max words - at ../dataset/shuffle-word-2555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1195 max words, 200 samples - at ../dataset/gen-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3215 max words, 200 samples - at ../dataset/gen-word-3215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2395 max words - at ../dataset/shuffle-word-2395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1965 max words - at ../dataset/shuffle-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2625 max words, 200 samples - at ../dataset/gen-word-2625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3155 max words, 200 samples - at ../dataset/gen-word-3155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2280 max words, 200 samples - at ../dataset/gen-word-2280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2215 max words, 200 samples - at ../dataset/gen-word-2215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1340 max words, 200 samples - at ../dataset/gen-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1110 max words, 200 samples - at ../dataset/gen-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2025 max words, 200 samples - at ../dataset/gen-word-2025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1710 max words, 200 samples - at ../dataset/gen-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2470 max words - at ../dataset/shuffle-word-2470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 985 max words, 200 samples - at ../dataset/gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2220 max words - at ../dataset/shuffle-word-2220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1845 max words, 200 samples - at ../dataset/gen-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2110 max words - at ../dataset/shuffle-word-2110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2165 max words, 200 samples - at ../dataset/gen-word-2165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 24 samples (20 token repeat) - 2685 max words - at ../dataset/shuffle-word-2685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (20 token repeat) - 2585 max words - at ../dataset/shuffle-word-2585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (20 token repeat) - 2455 max words - at ../dataset/shuffle-word-2455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2185 max words - at ../dataset/shuffle-word-2185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (20 token repeat) - 2415 max words - at ../dataset/shuffle-word-2415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2315 max words - at ../dataset/shuffle-word-2315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2810 max words - at ../dataset/shuffle-word-2810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2265 max words, 200 samples - at ../dataset/gen-word-2265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 23 samples (20 token repeat) - 2645 max words - at ../dataset/shuffle-word-2645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2020 max words - at ../dataset/shuffle-word-2020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2050 max words, 200 samples - at ../dataset/gen-word-2050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1625 max words - at ../dataset/shuffle-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1485 max words, 200 samples - at ../dataset/gen-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1535 max words, 200 samples - at ../dataset/gen-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1760 max words - at ../dataset/shuffle-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3115 max words - at ../dataset/shuffle-word-3115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2225 max words - at ../dataset/shuffle-word-2225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1355 max words, 200 samples - at ../dataset/gen-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2600 max words, 200 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1525 max words, 200 samples - at ../dataset/gen-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2390 max words - at ../dataset/shuffle-word-2390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1370 max words, 200 samples - at ../dataset/gen-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2180 max words - at ../dataset/shuffle-word-2180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2295 max words - at ../dataset/shuffle-word-2295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1650 max words, 200 samples - at ../dataset/gen-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1045 max words, 200 samples - at ../dataset/gen-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2865 max words, 200 samples - at ../dataset/gen-word-2865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 28 samples (20 token repeat) - 2510 max words - at ../dataset/shuffle-word-2510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 23 samples (20 token repeat) - 2670 max words - at ../dataset/shuffle-word-2670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2215 max words - at ../dataset/shuffle-word-2215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1160 max words, 200 samples - at ../dataset/gen-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2525 max words, 200 samples - at ../dataset/gen-word-2525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2005 max words - at ../dataset/shuffle-word-2005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 28 samples (20 token repeat) - 2550 max words - at ../dataset/shuffle-word-2550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (20 token repeat) - 2480 max words - at ../dataset/shuffle-word-2480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2040 max words - at ../dataset/shuffle-word-2040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2010 max words - at ../dataset/shuffle-word-2010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3160 max words, 200 samples - at ../dataset/gen-word-3160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1595 max words, 200 samples - at ../dataset/gen-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3250 max words, 200 samples - at ../dataset/gen-word-3250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1645 max words, 200 samples - at ../dataset/gen-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2025 max words - at ../dataset/shuffle-word-2025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2385 max words - at ../dataset/shuffle-word-2385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2350 max words - at ../dataset/shuffle-word-2350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3520 max words, 200 samples - at ../dataset/gen-word-3520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3415 max words - at ../dataset/shuffle-word-3415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2825 max words - at ../dataset/shuffle-word-2825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1585 max words, 200 samples - at ../dataset/gen-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2870 max words - at ../dataset/shuffle-word-2870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2650 max words, 200 samples - at ../dataset/gen-word-2650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1455 max words - at ../dataset/shuffle-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2070 max words - at ../dataset/shuffle-word-2070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2150 max words - at ../dataset/shuffle-word-2150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (20 token repeat) - 2435 max words - at ../dataset/shuffle-word-2435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2560 max words, 200 samples - at ../dataset/gen-word-2560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3450 max words, 200 samples - at ../dataset/gen-word-3450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 28 samples (20 token repeat) - 2505 max words - at ../dataset/shuffle-word-2505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1310 max words, 200 samples - at ../dataset/gen-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1470 max words, 200 samples - at ../dataset/gen-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2250 max words - at ../dataset/shuffle-word-2250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2785 max words - at ../dataset/shuffle-word-2785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3555 max words, 200 samples - at ../dataset/gen-word-3555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3050 max words - at ../dataset/shuffle-word-3050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1120 max words, 200 samples - at ../dataset/gen-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3540 max words, 200 samples - at ../dataset/gen-word-3540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3145 max words, 200 samples - at ../dataset/gen-word-3145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1490 max words, 200 samples - at ../dataset/gen-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2755 max words - at ../dataset/shuffle-word-2755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2640 max words - at ../dataset/shuffle-word-2640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 24 samples (20 token repeat) - 2655 max words - at ../dataset/shuffle-word-2655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1365 max words, 200 samples - at ../dataset/gen-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1450 max words, 200 samples - at ../dataset/gen-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2235 max words, 200 samples - at ../dataset/gen-word-2235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2280 max words - at ../dataset/shuffle-word-2280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3030 max words - at ../dataset/shuffle-word-3030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3760 max words, 200 samples - at ../dataset/gen-word-3760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3975 max words, 200 samples - at ../dataset/gen-word-3975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2530 max words, 200 samples - at ../dataset/gen-word-2530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2720 max words, 200 samples - at ../dataset/gen-word-2720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2370 max words - at ../dataset/shuffle-word-2370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3545 max words, 200 samples - at ../dataset/gen-word-3545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2395 max words, 200 samples - at ../dataset/gen-word-2395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 200 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2840 max words, 200 samples - at ../dataset/gen-word-2840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2085 max words - at ../dataset/shuffle-word-2085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1705 max words, 200 samples - at ../dataset/gen-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1910 max words - at ../dataset/shuffle-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2795 max words - at ../dataset/shuffle-word-2795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2095 max words - at ../dataset/shuffle-word-2095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2805 max words - at ../dataset/shuffle-word-2805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3525 max words - at ../dataset/shuffle-word-3525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2245 max words - at ../dataset/shuffle-word-2245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3680 max words, 200 samples - at ../dataset/gen-word-3680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2815 max words - at ../dataset/shuffle-word-2815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1480 max words, 200 samples - at ../dataset/gen-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2265 max words - at ../dataset/shuffle-word-2265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1540 max words, 200 samples - at ../dataset/gen-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2060 max words - at ../dataset/shuffle-word-2060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1540 max words - at ../dataset/shuffle-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2015 max words - at ../dataset/shuffle-word-2015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3350 max words - at ../dataset/shuffle-word-3350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3420 max words, 200 samples - at ../dataset/gen-word-3420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2170 max words - at ../dataset/shuffle-word-2170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2890 max words - at ../dataset/shuffle-word-2890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 37 samples (20 token repeat) - 2410 max words - at ../dataset/shuffle-word-2410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3560 max words, 200 samples - at ../dataset/gen-word-3560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3275 max words - at ../dataset/shuffle-word-3275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2790 max words, 200 samples - at ../dataset/gen-word-2790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1440 max words, 200 samples - at ../dataset/gen-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2620 max words, 200 samples - at ../dataset/gen-word-2620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2990 max words - at ../dataset/shuffle-word-2990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3125 max words, 200 samples - at ../dataset/gen-word-3125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 36 samples (20 token repeat) - 2465 max words - at ../dataset/shuffle-word-2465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1960 max words - at ../dataset/shuffle-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1330 max words, 200 samples - at ../dataset/gen-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2550 max words, 200 samples - at ../dataset/gen-word-2550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3290 max words, 200 samples - at ../dataset/gen-word-3290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2555 max words, 200 samples - at ../dataset/gen-word-2555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3265 max words, 200 samples - at ../dataset/gen-word-3265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1840 max words, 200 samples - at ../dataset/gen-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2945 max words, 200 samples - at ../dataset/gen-word-2945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2720 max words - at ../dataset/shuffle-word-2720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2840 max words - at ../dataset/shuffle-word-2840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2400 max words, 200 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2960 max words - at ../dataset/shuffle-word-2960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2730 max words, 200 samples - at ../dataset/gen-word-2730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2780 max words, 200 samples - at ../dataset/gen-word-2780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1315 max words, 200 samples - at ../dataset/gen-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2705 max words - at ../dataset/shuffle-word-2705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1925 max words, 200 samples - at ../dataset/gen-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1685 max words, 200 samples - at ../dataset/gen-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1875 max words, 200 samples - at ../dataset/gen-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3435 max words, 200 samples - at ../dataset/gen-word-3435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1690 max words, 200 samples - at ../dataset/gen-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1325 max words, 200 samples - at ../dataset/gen-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3195 max words, 200 samples - at ../dataset/gen-word-3195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2760 max words - at ../dataset/shuffle-word-2760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1560 max words, 200 samples - at ../dataset/gen-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1880 max words, 200 samples - at ../dataset/gen-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3220 max words - at ../dataset/shuffle-word-3220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1555 max words, 200 samples - at ../dataset/gen-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1295 max words, 200 samples - at ../dataset/gen-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3395 max words - at ../dataset/shuffle-word-3395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1495 max words, 200 samples - at ../dataset/gen-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2340 max words, 200 samples - at ../dataset/gen-word-2340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1350 max words, 200 samples - at ../dataset/gen-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1580 max words, 200 samples - at ../dataset/gen-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3605 max words, 200 samples - at ../dataset/gen-word-3605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2790 max words - at ../dataset/shuffle-word-2790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3305 max words - at ../dataset/shuffle-word-3305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3055 max words - at ../dataset/shuffle-word-3055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3375 max words - at ../dataset/shuffle-word-3375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3260 max words - at ../dataset/shuffle-word-3260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1865 max words, 200 samples - at ../dataset/gen-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3095 max words, 200 samples - at ../dataset/gen-word-3095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2860 max words - at ../dataset/shuffle-word-2860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1915 max words, 200 samples - at ../dataset/gen-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1475 max words, 200 samples - at ../dataset/gen-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2885 max words - at ../dataset/shuffle-word-2885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3320 max words - at ../dataset/shuffle-word-3320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1390 max words, 200 samples - at ../dataset/gen-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2945 max words - at ../dataset/shuffle-word-2945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3290 max words - at ../dataset/shuffle-word-3290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1420 max words, 200 samples - at ../dataset/gen-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3310 max words - at ../dataset/shuffle-word-3310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3120 max words - at ../dataset/shuffle-word-3120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3075 max words, 200 samples - at ../dataset/gen-word-3075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3260 max words, 200 samples - at ../dataset/gen-word-3260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3215 max words - at ../dataset/shuffle-word-3215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2715 max words - at ../dataset/shuffle-word-2715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3085 max words - at ../dataset/shuffle-word-3085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3135 max words - at ../dataset/shuffle-word-3135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2825 max words, 200 samples - at ../dataset/gen-word-2825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 33 samples (20 token repeat) - 2540 max words - at ../dataset/shuffle-word-2540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3210 max words - at ../dataset/shuffle-word-3210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3475 max words - at ../dataset/shuffle-word-3475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3160 max words - at ../dataset/shuffle-word-3160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3005 max words - at ../dataset/shuffle-word-3005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2510 max words, 200 samples - at ../dataset/gen-word-2510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3410 max words - at ../dataset/shuffle-word-3410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3235 max words - at ../dataset/shuffle-word-3235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3740 max words, 200 samples - at ../dataset/gen-word-3740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3590 max words, 200 samples - at ../dataset/gen-word-3590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3330 max words - at ../dataset/shuffle-word-3330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3250 max words - at ../dataset/shuffle-word-3250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1395 max words, 200 samples - at ../dataset/gen-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1830 max words, 200 samples - at ../dataset/gen-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3180 max words - at ../dataset/shuffle-word-3180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2930 max words - at ../dataset/shuffle-word-2930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3670 max words, 200 samples - at ../dataset/gen-word-3670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3985 max words, 200 samples - at ../dataset/gen-word-3985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2995 max words, 200 samples - at ../dataset/gen-word-2995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2310 max words, 200 samples - at ../dataset/gen-word-2310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1550 max words, 200 samples - at ../dataset/gen-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3780 max words, 200 samples - at ../dataset/gen-word-3780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1515 max words, 200 samples - at ../dataset/gen-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3435 max words - at ../dataset/shuffle-word-3435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3585 max words - at ../dataset/shuffle-word-3585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3420 max words - at ../dataset/shuffle-word-3420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1335 max words, 200 samples - at ../dataset/gen-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3345 max words - at ../dataset/shuffle-word-3345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1410 max words, 200 samples - at ../dataset/gen-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1755 max words, 200 samples - at ../dataset/gen-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3195 max words - at ../dataset/shuffle-word-3195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2935 max words, 200 samples - at ../dataset/gen-word-2935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3380 max words - at ../dataset/shuffle-word-3380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3145 max words - at ../dataset/shuffle-word-3145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2750 max words - at ../dataset/shuffle-word-2750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1740 max words, 200 samples - at ../dataset/gen-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1680 max words, 200 samples - at ../dataset/gen-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1725 max words, 200 samples - at ../dataset/gen-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3640 max words, 200 samples - at ../dataset/gen-word-3640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2515 max words, 200 samples - at ../dataset/gen-word-2515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1790 max words, 200 samples - at ../dataset/gen-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3065 max words - at ../dataset/shuffle-word-3065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3225 max words - at ../dataset/shuffle-word-3225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1605 max words, 200 samples - at ../dataset/gen-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3130 max words - at ../dataset/shuffle-word-3130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3365 max words, 200 samples - at ../dataset/gen-word-3365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2680 max words, 200 samples - at ../dataset/gen-word-2680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3270 max words - at ../dataset/shuffle-word-3270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3440 max words - at ../dataset/shuffle-word-3440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2885 max words, 200 samples - at ../dataset/gen-word-2885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1720 max words, 200 samples - at ../dataset/gen-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3020 max words - at ../dataset/shuffle-word-3020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1780 max words, 200 samples - at ../dataset/gen-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2710 max words, 200 samples - at ../dataset/gen-word-2710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3055 max words, 200 samples - at ../dataset/gen-word-3055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1955 max words, 200 samples - at ../dataset/gen-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3295 max words, 200 samples - at ../dataset/gen-word-3295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2095 max words, 200 samples - at ../dataset/gen-word-2095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3165 max words - at ../dataset/shuffle-word-3165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3150 max words - at ../dataset/shuffle-word-3150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1855 max words, 200 samples - at ../dataset/gen-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3885 max words - at ../dataset/shuffle-word-3885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2300 max words, 200 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3990 max words, 200 samples - at ../dataset/gen-word-3990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1655 max words, 200 samples - at ../dataset/gen-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2900 max words, 200 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2435 max words, 200 samples - at ../dataset/gen-word-2435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3890 max words - at ../dataset/shuffle-word-3890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 200 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3695 max words - at ../dataset/shuffle-word-3695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1520 max words, 200 samples - at ../dataset/gen-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3770 max words, 200 samples - at ../dataset/gen-word-3770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3030 max words, 200 samples - at ../dataset/gen-word-3030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3625 max words - at ../dataset/shuffle-word-3625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3330 max words, 200 samples - at ../dataset/gen-word-3330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3015 max words, 200 samples - at ../dataset/gen-word-3015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2040 max words, 200 samples - at ../dataset/gen-word-2040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3585 max words, 200 samples - at ../dataset/gen-word-3585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1695 max words, 200 samples - at ../dataset/gen-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2655 max words, 200 samples - at ../dataset/gen-word-2655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3745 max words - at ../dataset/shuffle-word-3745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3390 max words - at ../dataset/shuffle-word-3390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2995 max words - at ../dataset/shuffle-word-2995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1510 max words, 200 samples - at ../dataset/gen-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1575 max words, 200 samples - at ../dataset/gen-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3615 max words, 200 samples - at ../dataset/gen-word-3615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3185 max words - at ../dataset/shuffle-word-3185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3285 max words, 200 samples - at ../dataset/gen-word-3285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3725 max words - at ../dataset/shuffle-word-3725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3845 max words - at ../dataset/shuffle-word-3845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3235 max words, 200 samples - at ../dataset/gen-word-3235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1435 max words, 200 samples - at ../dataset/gen-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3640 max words - at ../dataset/shuffle-word-3640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1825 max words, 200 samples - at ../dataset/gen-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2935 max words - at ../dataset/shuffle-word-2935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3960 max words - at ../dataset/shuffle-word-3960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3815 max words - at ../dataset/shuffle-word-3815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2715 max words, 200 samples - at ../dataset/gen-word-2715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3650 max words - at ../dataset/shuffle-word-3650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3655 max words, 200 samples - at ../dataset/gen-word-3655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1805 max words, 200 samples - at ../dataset/gen-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2845 max words, 200 samples - at ../dataset/gen-word-2845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3200 max words, 200 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1625 max words, 200 samples - at ../dataset/gen-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3665 max words - at ../dataset/shuffle-word-3665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 200 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3750 max words - at ../dataset/shuffle-word-3750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3930 max words - at ../dataset/shuffle-word-3930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3685 max words - at ../dataset/shuffle-word-3685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3670 max words - at ../dataset/shuffle-word-3670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3740 max words - at ../dataset/shuffle-word-3740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3855 max words - at ../dataset/shuffle-word-3855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3355 max words - at ../dataset/shuffle-word-3355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3985 max words - at ../dataset/shuffle-word-3985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1930 max words, 200 samples - at ../dataset/gen-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3550 max words - at ../dataset/shuffle-word-3550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3095 max words - at ../dataset/shuffle-word-3095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3775 max words - at ../dataset/shuffle-word-3775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3240 max words, 200 samples - at ../dataset/gen-word-3240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1920 max words, 200 samples - at ../dataset/gen-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1405 max words, 200 samples - at ../dataset/gen-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1730 max words, 200 samples - at ../dataset/gen-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2370 max words, 200 samples - at ../dataset/gen-word-2370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3980 max words - at ../dataset/shuffle-word-3980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2810 max words, 200 samples - at ../dataset/gen-word-2810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3805 max words, 200 samples - at ../dataset/gen-word-3805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3735 max words, 200 samples - at ../dataset/gen-word-3735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2185 max words, 200 samples - at ../dataset/gen-word-2185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3755 max words, 200 samples - at ../dataset/gen-word-3755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3140 max words - at ../dataset/shuffle-word-3140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3825 max words - at ../dataset/shuffle-word-3825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3335 max words, 200 samples - at ../dataset/gen-word-3335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3560 max words - at ../dataset/shuffle-word-3560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3405 max words, 200 samples - at ../dataset/gen-word-3405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3905 max words - at ../dataset/shuffle-word-3905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2315 max words, 200 samples - at ../dataset/gen-word-2315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1815 max words, 200 samples - at ../dataset/gen-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2320 max words, 200 samples - at ../dataset/gen-word-2320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2480 max words, 200 samples - at ../dataset/gen-word-2480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3555 max words - at ../dataset/shuffle-word-3555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1415 max words, 200 samples - at ../dataset/gen-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3865 max words - at ../dataset/shuffle-word-3865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3870 max words - at ../dataset/shuffle-word-3870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1565 max words, 200 samples - at ../dataset/gen-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3715 max words - at ../dataset/shuffle-word-3715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2485 max words, 200 samples - at ../dataset/gen-word-2485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3780 max words - at ../dataset/shuffle-word-3780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1970 max words, 200 samples - at ../dataset/gen-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3730 max words - at ../dataset/shuffle-word-3730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2180 max words, 200 samples - at ../dataset/gen-word-2180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1745 max words, 200 samples - at ../dataset/gen-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3545 max words - at ../dataset/shuffle-word-3545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3935 max words - at ../dataset/shuffle-word-3935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2065 max words, 200 samples - at ../dataset/gen-word-2065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3835 max words - at ../dataset/shuffle-word-3835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3810 max words - at ../dataset/shuffle-word-3810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3875 max words - at ../dataset/shuffle-word-3875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3765 max words - at ../dataset/shuffle-word-3765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3325 max words, 200 samples - at ../dataset/gen-word-3325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3175 max words - at ../dataset/shuffle-word-3175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3735 max words - at ../dataset/shuffle-word-3735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3975 max words - at ../dataset/shuffle-word-3975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1505 max words, 200 samples - at ../dataset/gen-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2455 max words, 200 samples - at ../dataset/gen-word-2455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3685 max words, 200 samples - at ../dataset/gen-word-3685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3940 max words - at ../dataset/shuffle-word-3940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2415 max words, 200 samples - at ../dataset/gen-word-2415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3760 max words - at ../dataset/shuffle-word-3760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3915 max words - at ../dataset/shuffle-word-3915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3990 max words - at ../dataset/shuffle-word-3990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2930 max words, 200 samples - at ../dataset/gen-word-2930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3730 max words, 200 samples - at ../dataset/gen-word-3730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2830 max words, 200 samples - at ../dataset/gen-word-2830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3010 max words - at ../dataset/shuffle-word-3010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1885 max words, 200 samples - at ../dataset/gen-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3850 max words - at ../dataset/shuffle-word-3850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1610 max words, 200 samples - at ../dataset/gen-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2260 max words, 200 samples - at ../dataset/gen-word-2260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3415 max words, 200 samples - at ../dataset/gen-word-3415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2035 max words, 200 samples - at ../dataset/gen-word-2035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3230 max words, 200 samples - at ../dataset/gen-word-3230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3180 max words, 200 samples - at ../dataset/gen-word-3180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3490 max words, 200 samples - at ../dataset/gen-word-3490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3725 max words, 200 samples - at ../dataset/gen-word-3725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1835 max words, 200 samples - at ../dataset/gen-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3870 max words, 200 samples - at ../dataset/gen-word-3870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3460 max words, 200 samples - at ../dataset/gen-word-3460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2795 max words, 200 samples - at ../dataset/gen-word-2795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3540 max words - at ../dataset/shuffle-word-3540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2015 max words, 200 samples - at ../dataset/gen-word-2015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3920 max words - at ../dataset/shuffle-word-3920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3515 max words - at ../dataset/shuffle-word-3515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3080 max words - at ../dataset/shuffle-word-3080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2255 max words, 200 samples - at ../dataset/gen-word-2255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2645 max words, 200 samples - at ../dataset/gen-word-2645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2045 max words, 200 samples - at ../dataset/gen-word-2045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3830 max words - at ../dataset/shuffle-word-3830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3440 max words, 200 samples - at ../dataset/gen-word-3440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3880 max words - at ../dataset/shuffle-word-3880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3965 max words - at ../dataset/shuffle-word-3965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2345 max words, 200 samples - at ../dataset/gen-word-2345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2190 max words, 200 samples - at ../dataset/gen-word-2190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2385 max words, 200 samples - at ../dataset/gen-word-2385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2610 max words, 200 samples - at ../dataset/gen-word-2610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2055 max words, 200 samples - at ../dataset/gen-word-2055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2445 max words, 200 samples - at ../dataset/gen-word-2445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2580 max words, 200 samples - at ../dataset/gen-word-2580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3770 max words - at ../dataset/shuffle-word-3770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2745 max words, 200 samples - at ../dataset/gen-word-2745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2565 max words, 200 samples - at ../dataset/gen-word-2565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2705 max words, 200 samples - at ../dataset/gen-word-2705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3720 max words, 200 samples - at ../dataset/gen-word-3720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2970 max words, 200 samples - at ../dataset/gen-word-2970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3465 max words, 200 samples - at ../dataset/gen-word-3465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2490 max words, 200 samples - at ../dataset/gen-word-2490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2765 max words, 200 samples - at ../dataset/gen-word-2765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2170 max words, 200 samples - at ../dataset/gen-word-2170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2030 max words, 200 samples - at ../dataset/gen-word-2030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1615 max words, 200 samples - at ../dataset/gen-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3800 max words, 200 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3360 max words, 200 samples - at ../dataset/gen-word-3360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3105 max words, 200 samples - at ../dataset/gen-word-3105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3485 max words, 200 samples - at ../dataset/gen-word-3485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2285 max words, 200 samples - at ../dataset/gen-word-2285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3340 max words, 200 samples - at ../dataset/gen-word-3340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2010 max words, 200 samples - at ../dataset/gen-word-2010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3775 max words, 200 samples - at ../dataset/gen-word-3775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3050 max words, 200 samples - at ../dataset/gen-word-3050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2895 max words, 200 samples - at ../dataset/gen-word-2895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1980 max words, 200 samples - at ../dataset/gen-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2475 max words, 200 samples - at ../dataset/gen-word-2475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2460 max words, 200 samples - at ../dataset/gen-word-2460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1775 max words, 200 samples - at ../dataset/gen-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2910 max words, 200 samples - at ../dataset/gen-word-2910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2860 max words, 200 samples - at ../dataset/gen-word-2860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3790 max words, 200 samples - at ../dataset/gen-word-3790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2330 max words, 200 samples - at ../dataset/gen-word-2330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2450 max words, 200 samples - at ../dataset/gen-word-2450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3120 max words, 200 samples - at ../dataset/gen-word-3120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2875 max words, 200 samples - at ../dataset/gen-word-2875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2005 max words, 200 samples - at ../dataset/gen-word-2005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2360 max words, 200 samples - at ../dataset/gen-word-2360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2605 max words, 200 samples - at ../dataset/gen-word-2605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3660 max words, 200 samples - at ../dataset/gen-word-3660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2985 max words, 200 samples - at ../dataset/gen-word-2985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2905 max words, 200 samples - at ../dataset/gen-word-2905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2870 max words, 200 samples - at ../dataset/gen-word-2870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2155 max words, 200 samples - at ../dataset/gen-word-2155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2735 max words, 200 samples - at ../dataset/gen-word-2735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2205 max words, 200 samples - at ../dataset/gen-word-2205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1985 max words, 200 samples - at ../dataset/gen-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3650 max words, 200 samples - at ../dataset/gen-word-3650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2105 max words, 200 samples - at ../dataset/gen-word-2105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2815 max words, 200 samples - at ../dataset/gen-word-2815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2640 max words, 200 samples - at ../dataset/gen-word-2640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2800 max words, 200 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2425 max words, 200 samples - at ../dataset/gen-word-2425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3595 max words, 200 samples - at ../dataset/gen-word-3595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3470 max words, 200 samples - at ../dataset/gen-word-3470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3840 max words, 200 samples - at ../dataset/gen-word-3840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2725 max words, 200 samples - at ../dataset/gen-word-2725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2440 max words, 200 samples - at ../dataset/gen-word-2440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2835 max words, 200 samples - at ../dataset/gen-word-2835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4000 max words, 200 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2755 max words, 200 samples - at ../dataset/gen-word-2755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3515 max words, 200 samples - at ../dataset/gen-word-3515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2145 max words, 200 samples - at ../dataset/gen-word-2145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1995 max words, 200 samples - at ../dataset/gen-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2175 max words, 200 samples - at ../dataset/gen-word-2175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1965 max words, 200 samples - at ../dataset/gen-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2675 max words, 200 samples - at ../dataset/gen-word-2675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2535 max words, 200 samples - at ../dataset/gen-word-2535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3930 max words, 200 samples - at ../dataset/gen-word-3930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2890 max words, 200 samples - at ../dataset/gen-word-2890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2465 max words, 200 samples - at ../dataset/gen-word-2465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3455 max words, 200 samples - at ../dataset/gen-word-3455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3815 max words, 200 samples - at ../dataset/gen-word-3815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3935 max words, 200 samples - at ../dataset/gen-word-3935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3370 max words, 200 samples - at ../dataset/gen-word-3370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2950 max words, 200 samples - at ../dataset/gen-word-2950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2980 max words, 200 samples - at ../dataset/gen-word-2980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3040 max words, 200 samples - at ../dataset/gen-word-3040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2220 max words, 200 samples - at ../dataset/gen-word-2220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3620 max words, 200 samples - at ../dataset/gen-word-3620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3995 max words, 200 samples - at ../dataset/gen-word-3995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2275 max words, 200 samples - at ../dataset/gen-word-2275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3300 max words, 200 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2740 max words, 200 samples - at ../dataset/gen-word-2740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2990 max words, 200 samples - at ../dataset/gen-word-2990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2090 max words, 200 samples - at ../dataset/gen-word-2090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2570 max words, 200 samples - at ../dataset/gen-word-2570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3880 max words, 200 samples - at ../dataset/gen-word-3880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2685 max words, 200 samples - at ../dataset/gen-word-2685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2750 max words, 200 samples - at ../dataset/gen-word-2750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2305 max words, 200 samples - at ../dataset/gen-word-2305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2350 max words, 200 samples - at ../dataset/gen-word-2350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3575 max words, 200 samples - at ../dataset/gen-word-3575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3305 max words, 200 samples - at ../dataset/gen-word-3305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2380 max words, 200 samples - at ../dataset/gen-word-2380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2540 max words, 200 samples - at ../dataset/gen-word-2540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2770 max words, 200 samples - at ../dataset/gen-word-2770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2665 max words, 200 samples - at ../dataset/gen-word-2665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2785 max words, 200 samples - at ../dataset/gen-word-2785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2585 max words, 200 samples - at ../dataset/gen-word-2585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3255 max words, 200 samples - at ../dataset/gen-word-3255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3130 max words, 200 samples - at ../dataset/gen-word-3130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3895 max words, 200 samples - at ../dataset/gen-word-3895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2975 max words, 200 samples - at ../dataset/gen-word-2975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3955 max words, 200 samples - at ../dataset/gen-word-3955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2595 max words, 200 samples - at ../dataset/gen-word-2595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3665 max words, 200 samples - at ../dataset/gen-word-3665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3945 max words, 200 samples - at ../dataset/gen-word-3945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3220 max words, 200 samples - at ../dataset/gen-word-3220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2925 max words, 200 samples - at ../dataset/gen-word-2925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3380 max words, 200 samples - at ../dataset/gen-word-3380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3185 max words, 200 samples - at ../dataset/gen-word-3185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2805 max words, 200 samples - at ../dataset/gen-word-2805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3085 max words, 200 samples - at ../dataset/gen-word-3085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3070 max words, 200 samples - at ../dataset/gen-word-3070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3965 max words, 200 samples - at ../dataset/gen-word-3965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2820 max words, 200 samples - at ../dataset/gen-word-2820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3205 max words, 200 samples - at ../dataset/gen-word-3205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3020 max words, 200 samples - at ../dataset/gen-word-3020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2615 max words, 200 samples - at ../dataset/gen-word-2615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2915 max words, 200 samples - at ../dataset/gen-word-2915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2775 max words, 200 samples - at ../dataset/gen-word-2775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3835 max words, 200 samples - at ../dataset/gen-word-3835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3035 max words, 200 samples - at ../dataset/gen-word-3035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2690 max words, 200 samples - at ../dataset/gen-word-2690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2940 max words, 200 samples - at ../dataset/gen-word-2940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2955 max words, 200 samples - at ../dataset/gen-word-2955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3395 max words, 200 samples - at ../dataset/gen-word-3395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3005 max words, 200 samples - at ../dataset/gen-word-3005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2855 max words, 200 samples - at ../dataset/gen-word-2855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3165 max words, 200 samples - at ../dataset/gen-word-3165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3115 max words, 200 samples - at ../dataset/gen-word-3115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3065 max words, 200 samples - at ../dataset/gen-word-3065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3610 max words, 200 samples - at ../dataset/gen-word-3610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2920 max words, 200 samples - at ../dataset/gen-word-2920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3425 max words, 200 samples - at ../dataset/gen-word-3425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3060 max words, 200 samples - at ../dataset/gen-word-3060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3400 max words, 200 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3025 max words, 200 samples - at ../dataset/gen-word-3025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3190 max words, 200 samples - at ../dataset/gen-word-3190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3080 max words, 200 samples - at ../dataset/gen-word-3080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3210 max words, 200 samples - at ../dataset/gen-word-3210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3865 max words, 200 samples - at ../dataset/gen-word-3865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3355 max words, 200 samples - at ../dataset/gen-word-3355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3000 max words, 200 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3390 max words, 200 samples - at ../dataset/gen-word-3390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3855 max words, 200 samples - at ../dataset/gen-word-3855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3350 max words, 200 samples - at ../dataset/gen-word-3350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3140 max words, 200 samples - at ../dataset/gen-word-3140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3045 max words, 200 samples - at ../dataset/gen-word-3045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3535 max words, 200 samples - at ../dataset/gen-word-3535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3135 max words, 200 samples - at ../dataset/gen-word-3135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3110 max words, 200 samples - at ../dataset/gen-word-3110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3510 max words, 200 samples - at ../dataset/gen-word-3510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3500 max words, 200 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3860 max words, 200 samples - at ../dataset/gen-word-3860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3270 max words, 200 samples - at ../dataset/gen-word-3270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3525 max words, 200 samples - at ../dataset/gen-word-3525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3245 max words, 200 samples - at ../dataset/gen-word-3245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3630 max words, 200 samples - at ../dataset/gen-word-3630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3480 max words, 200 samples - at ../dataset/gen-word-3480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3410 max words, 200 samples - at ../dataset/gen-word-3410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3695 max words, 200 samples - at ../dataset/gen-word-3695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3625 max words, 200 samples - at ../dataset/gen-word-3625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3530 max words, 200 samples - at ../dataset/gen-word-3530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3925 max words, 200 samples - at ../dataset/gen-word-3925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3310 max words, 200 samples - at ../dataset/gen-word-3310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3905 max words, 200 samples - at ../dataset/gen-word-3905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3715 max words, 200 samples - at ../dataset/gen-word-3715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3475 max words, 200 samples - at ../dataset/gen-word-3475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3765 max words, 200 samples - at ../dataset/gen-word-3765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3375 max words, 200 samples - at ../dataset/gen-word-3375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3850 max words, 200 samples - at ../dataset/gen-word-3850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3175 max words, 200 samples - at ../dataset/gen-word-3175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3980 max words, 200 samples - at ../dataset/gen-word-3980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3820 max words, 200 samples - at ../dataset/gen-word-3820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3225 max words, 200 samples - at ../dataset/gen-word-3225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3345 max words, 200 samples - at ../dataset/gen-word-3345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3795 max words, 200 samples - at ../dataset/gen-word-3795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3315 max words, 200 samples - at ../dataset/gen-word-3315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3960 max words, 200 samples - at ../dataset/gen-word-3960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3430 max words, 200 samples - at ../dataset/gen-word-3430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3825 max words, 200 samples - at ../dataset/gen-word-3825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3570 max words, 200 samples - at ../dataset/gen-word-3570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3915 max words, 200 samples - at ../dataset/gen-word-3915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3505 max words, 200 samples - at ../dataset/gen-word-3505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3705 max words, 200 samples - at ../dataset/gen-word-3705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3890 max words, 200 samples - at ../dataset/gen-word-3890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3495 max words, 200 samples - at ../dataset/gen-word-3495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3675 max words, 200 samples - at ../dataset/gen-word-3675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3550 max words, 200 samples - at ../dataset/gen-word-3550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3170 max words, 200 samples - at ../dataset/gen-word-3170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3710 max words, 200 samples - at ../dataset/gen-word-3710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3600 max words, 200 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3700 max words, 200 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3635 max words, 200 samples - at ../dataset/gen-word-3635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3690 max words, 200 samples - at ../dataset/gen-word-3690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3750 max words, 200 samples - at ../dataset/gen-word-3750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3580 max words, 200 samples - at ../dataset/gen-word-3580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3645 max words, 200 samples - at ../dataset/gen-word-3645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3900 max words, 200 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3785 max words, 200 samples - at ../dataset/gen-word-3785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3950 max words, 200 samples - at ../dataset/gen-word-3950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3940 max words, 200 samples - at ../dataset/gen-word-3940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3810 max words, 200 samples - at ../dataset/gen-word-3810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3910 max words, 200 samples - at ../dataset/gen-word-3910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3845 max words, 200 samples - at ../dataset/gen-word-3845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3830 max words, 200 samples - at ../dataset/gen-word-3830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3875 max words, 200 samples - at ../dataset/gen-word-3875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3920 max words, 200 samples - at ../dataset/gen-word-3920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3970 max words, 200 samples - at ../dataset/gen-word-3970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3885 max words, 200 samples - at ../dataset/gen-word-3885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3745 max words, 200 samples - at ../dataset/gen-word-3745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 3.4G\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x  2 root root   72K Aug 24 02:35 .\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "drwxr-xr-x 11 root root   230 Aug 24 02:33 ..\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   19K Aug 24 02:35 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  107K Aug 24 02:35 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  111K Aug 24 02:35 gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.1M Aug 24 02:35 gen-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  114K Aug 24 02:35 gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  121K Aug 24 02:35 gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.2M Aug 24 02:35 gen-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  128K Aug 24 02:35 gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.3M Aug 24 02:35 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  131K Aug 24 02:35 gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.4M Aug 24 02:35 gen-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  132K Aug 24 02:35 gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.5M Aug 24 02:35 gen-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  144K Aug 24 02:35 gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.6M Aug 24 02:35 gen-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  146K Aug 24 02:35 gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.7M Aug 24 02:35 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  148K Aug 24 02:35 gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.8M Aug 24 02:35 gen-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   25K Aug 24 02:35 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  152K Aug 24 02:35 gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.9M Aug 24 02:35 gen-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  161K Aug 24 02:35 gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.0M Aug 24 02:35 gen-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  163K Aug 24 02:35 gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.1M Aug 24 02:35 gen-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  164K Aug 24 02:35 gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.2M Aug 24 02:35 gen-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  173K Aug 24 02:35 gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.3M Aug 24 02:35 gen-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  185K Aug 24 02:35 gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.4M Aug 24 02:35 gen-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  185K Aug 24 02:35 gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.5M Aug 24 02:35 gen-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  188K Aug 24 02:35 gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.6M Aug 24 02:35 gen-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  197K Aug 24 02:35 gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.7M Aug 24 02:35 gen-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  196K Aug 24 02:35 gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.8M Aug 24 02:35 gen-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  204K Aug 24 02:35 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  3.9M Aug 24 02:35 gen-word-2035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  201K Aug 24 02:35 gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.0M Aug 24 02:35 gen-word-2090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  213K Aug 24 02:35 gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.1M Aug 24 02:35 gen-word-2140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  225K Aug 24 02:35 gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  226K Aug 24 02:35 gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.2M Aug 24 02:35 gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  234K Aug 24 02:35 gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.3M Aug 24 02:35 gen-word-2250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.4M Aug 24 02:35 gen-word-2295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  236K Aug 24 02:35 gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  243K Aug 24 02:35 gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.5M Aug 24 02:35 gen-word-2360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  242K Aug 24 02:35 gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.6M Aug 24 02:35 gen-word-2415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  250K Aug 24 02:35 gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.7M Aug 24 02:35 gen-word-2460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   34K Aug 24 02:35 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  249K Aug 24 02:35 gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.8M Aug 24 02:35 gen-word-2515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  254K Aug 24 02:35 gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  4.9M Aug 24 02:35 gen-word-2565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  264K Aug 24 02:35 gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.0M Aug 24 02:35 gen-word-2625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  264K Aug 24 02:35 gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.1M Aug 24 02:35 gen-word-2680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  275K Aug 24 02:35 gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.2M Aug 24 02:35 gen-word-2725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  281K Aug 24 02:35 gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.3M Aug 24 02:35 gen-word-2780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  279K Aug 24 02:35 gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.4M Aug 24 02:35 gen-word-2830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  286K Aug 24 02:35 gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.5M Aug 24 02:35 gen-word-2890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  295K Aug 24 02:35 gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.6M Aug 24 02:35 gen-word-2935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  298K Aug 24 02:35 gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-2990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.7M Aug 24 02:35 gen-word-2995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   39K Aug 24 02:35 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  300K Aug 24 02:35 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  306K Aug 24 02:35 gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.8M Aug 24 02:35 gen-word-3050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  307K Aug 24 02:35 gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  5.9M Aug 24 02:35 gen-word-3110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  315K Aug 24 02:35 gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.0M Aug 24 02:35 gen-word-3155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  324K Aug 24 02:35 gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.1M Aug 24 02:35 gen-word-3205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  331K Aug 24 02:35 gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.2M Aug 24 02:35 gen-word-3260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  328K Aug 24 02:35 gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.3M Aug 24 02:35 gen-word-3310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  339K Aug 24 02:35 gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.4M Aug 24 02:35 gen-word-3350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  340K Aug 24 02:35 gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.5M Aug 24 02:35 gen-word-3420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  345K Aug 24 02:35 gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.6M Aug 24 02:35 gen-word-3455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   44K Aug 24 02:35 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  353K Aug 24 02:35 gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.7M Aug 24 02:35 gen-word-3520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  357K Aug 24 02:35 gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.8M Aug 24 02:35 gen-word-3575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  355K Aug 24 02:35 gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  6.9M Aug 24 02:35 gen-word-3620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  373K Aug 24 02:35 gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.0M Aug 24 02:35 gen-word-3680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  366K Aug 24 02:35 gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.1M Aug 24 02:35 gen-word-3730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  380K Aug 24 02:35 gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.2M Aug 24 02:35 gen-word-3785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  383K Aug 24 02:35 gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.3M Aug 24 02:35 gen-word-3840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  391K Aug 24 02:35 gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.4M Aug 24 02:35 gen-word-3880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  394K Aug 24 02:35 gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.5M Aug 24 02:35 gen-word-3940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  400K Aug 24 02:35 gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-3990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.7M Aug 24 02:35 gen-word-3995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   48K Aug 24 02:35 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  395K Aug 24 02:35 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  7.6M Aug 24 02:35 gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  409K Aug 24 02:35 gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  410K Aug 24 02:35 gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  424K Aug 24 02:35 gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  417K Aug 24 02:35 gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  426K Aug 24 02:35 gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  429K Aug 24 02:35 gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  432K Aug 24 02:35 gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  437K Aug 24 02:35 gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  442K Aug 24 02:35 gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   55K Aug 24 02:35 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  446K Aug 24 02:35 gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  447K Aug 24 02:35 gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  461K Aug 24 02:35 gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  459K Aug 24 02:35 gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  465K Aug 24 02:35 gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  472K Aug 24 02:35 gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  468K Aug 24 02:35 gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  491K Aug 24 02:35 gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  487K Aug 24 02:35 gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  488K Aug 24 02:35 gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   15K Aug 24 02:35 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   59K Aug 24 02:35 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  499K Aug 24 02:35 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1002K Aug 24 02:35 gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root 1010K Aug 24 02:35 gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   63K Aug 24 02:35 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.1M Aug 24 02:35 gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   68K Aug 24 02:35 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.2M Aug 24 02:35 gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   74K Aug 24 02:35 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.3M Aug 24 02:35 gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   77K Aug 24 02:35 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.4M Aug 24 02:35 gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   83K Aug 24 02:35 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.5M Aug 24 02:35 gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   88K Aug 24 02:35 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.6M Aug 24 02:35 gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   94K Aug 24 02:35 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.7M Aug 24 02:35 gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   99K Aug 24 02:35 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.8M Aug 24 02:35 gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  103K Aug 24 02:35 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  1.9M Aug 24 02:35 gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  2.0M Aug 24 02:35 gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   50K Aug 24 02:35 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-1160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-1260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:35 shuffle-word-1330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   45K Aug 24 02:35 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-1685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-1905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-1925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-1935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-1965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-1975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-1985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-1990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-1995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   37K Aug 24 02:35 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-2025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-2060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-2090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-2105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-2265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:35 shuffle-word-2390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:35 shuffle-word-2435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  518K Aug 24 02:35 shuffle-word-2465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-2470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-2495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   37K Aug 24 02:35 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  514K Aug 24 02:35 shuffle-word-2505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  513K Aug 24 02:35 shuffle-word-2510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  515K Aug 24 02:35 shuffle-word-2515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  513K Aug 24 02:35 shuffle-word-2520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:35 shuffle-word-2525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:35 shuffle-word-2535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  516K Aug 24 02:35 shuffle-word-2540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  512K Aug 24 02:35 shuffle-word-2545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  514K Aug 24 02:35 shuffle-word-2550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  514K Aug 24 02:35 shuffle-word-2555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  515K Aug 24 02:35 shuffle-word-2560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  515K Aug 24 02:35 shuffle-word-2570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  511K Aug 24 02:35 shuffle-word-2580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  513K Aug 24 02:35 shuffle-word-2590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  511K Aug 24 02:35 shuffle-word-2610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  511K Aug 24 02:35 shuffle-word-2620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  512K Aug 24 02:35 shuffle-word-2625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  512K Aug 24 02:35 shuffle-word-2630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  514K Aug 24 02:35 shuffle-word-2635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  514K Aug 24 02:35 shuffle-word-2650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  512K Aug 24 02:35 shuffle-word-2655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  511K Aug 24 02:35 shuffle-word-2660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  511K Aug 24 02:35 shuffle-word-2685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  511K Aug 24 02:35 shuffle-word-2715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  510K Aug 24 02:35 shuffle-word-2740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-2975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-2995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   33K Aug 24 02:35 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3005-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3010-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3015-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3020-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3025-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3030-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3035-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3040-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3045-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3050-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3055-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3060-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3065-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3070-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3075-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3080-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3085-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3090-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3095-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   33K Aug 24 02:35 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  508K Aug 24 02:35 shuffle-word-3975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-3995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  509K Aug 24 02:35 shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   33K Aug 24 02:35 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   28K Aug 24 02:35 shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   27K Aug 24 02:35 shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   80K Aug 24 02:35 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   31K Aug 24 02:35 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   26K Aug 24 02:35 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:35 shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:35 shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   32K Aug 24 02:35 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  530K Aug 24 02:35 shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:35 shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  528K Aug 24 02:35 shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   30K Aug 24 02:35 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  527K Aug 24 02:35 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  526K Aug 24 02:35 shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  517K Aug 24 02:35 shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  525K Aug 24 02:35 shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  519K Aug 24 02:35 shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  521K Aug 24 02:35 shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   29K Aug 24 02:35 shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  523K Aug 24 02:35 shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  522K Aug 24 02:35 shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  524K Aug 24 02:35 shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root  520K Aug 24 02:35 shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r--  1 root root   12K Aug 24 02:35 word-2-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 100 &\n",
+    "for i in {5..500..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 50+ - 2100 words dataset\n",
+    "# \n",
+    "for i in {505..4000..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 200 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -alh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "dfb59378",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:24.116572Z",
+     "iopub.status.busy": "2023-08-24T02:35:24.116209Z",
+     "iopub.status.idle": "2023-08-24T02:35:38.806028Z",
+     "shell.execute_reply": "2023-08-24T02:35:38.804853Z"
+    },
+    "papermill": {
+     "duration": 14.969171,
+     "end_time": "2023-08-24T02:35:38.807866",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:23.838695",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-4k (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=4096', '--model.ctx_len=4096', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-4k (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=4096', '--model.ctx_len=4096', '--model.bptt_learning_range=1', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 2123144594\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 2123144594\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.15.8 is available!  To upgrade, please run:\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.4\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230824_023530-bubbie44\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-4k (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/bubbie44\u001b[0m\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 254, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
+      "    self.instantiate_classes()\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
+      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
+      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
+      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
+      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
+      "    component.instantiate_class(component, cfg)\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
+      "    parent[key] = group.group_class(**value)\r\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-1k.pth' does not exist\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-4k (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/bubbie44\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230824_023530-bubbie44/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/v5base-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-4k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-4k/\" \\\n",
+    "        --model.lr_init=3e-4 \\\n",
+    "        --model.lr_final=1e-4 \\\n",
+    "        --data.max_token_size=4096 \\\n",
+    "        --model.ctx_len=4096 \\\n",
+    "        --model.bptt_learning_range=1 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-1k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "c1bf01de",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:39.373965Z",
+     "iopub.status.busy": "2023-08-24T02:35:39.372678Z",
+     "iopub.status.idle": "2023-08-24T02:35:42.092861Z",
+     "shell.execute_reply": "2023-08-24T02:35:42.092034Z"
+    },
+    "papermill": {
+     "duration": 3.007219,
+     "end_time": "2023-08-24T02:35:42.094547",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:39.087328",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-4k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "71c87f6f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:42.690964Z",
+     "iopub.status.busy": "2023-08-24T02:35:42.690451Z",
+     "iopub.status.idle": "2023-08-24T02:35:47.414129Z",
+     "shell.execute_reply": "2023-08-24T02:35:47.413309Z"
+    },
+    "papermill": {
+     "duration": 5.003479,
+     "end_time": "2023-08-24T02:35:47.416170",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:42.412691",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f5d61ac7",
+   "metadata": {
+    "papermill": {
+     "duration": 0.267179,
+     "end_time": "2023-08-24T02:35:47.952985",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:47.685806",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 6 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 6: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "6f6b5515",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:48.561780Z",
+     "iopub.status.busy": "2023-08-24T02:35:48.561571Z",
+     "iopub.status.idle": "2023-08-24T02:35:56.032318Z",
+     "shell.execute_reply": "2023-08-24T02:35:56.029867Z"
+    },
+    "papermill": {
+     "duration": 7.81188,
+     "end_time": "2023-08-24T02:35:56.104013",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:48.292133",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 563 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 107 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 134 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 177 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 50 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 88 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 51 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 67 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 43 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 268 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 34 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 76 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 50 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 33 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 29 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 29 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 30 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 38 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 8 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 24 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 8 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 50 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 6.1G\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 9.7K Aug 24 02:35 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  53K Aug 24 02:35 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 492K Aug 24 02:35 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  57K Aug 24 02:35 gen-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  60K Aug 24 02:35 gen-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  22M Aug 24 02:35 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  62K Aug 24 02:35 gen-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65K Aug 24 02:35 gen-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  24M Aug 24 02:35 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69K Aug 24 02:35 gen-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69K Aug 24 02:35 gen-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25M Aug 24 02:35 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  70K Aug 24 02:35 gen-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73K Aug 24 02:35 gen-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27M Aug 24 02:35 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  74K Aug 24 02:35 gen-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  13K Aug 24 02:35 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79K Aug 24 02:35 gen-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29M Aug 24 02:35 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82K Aug 24 02:35 gen-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  83K Aug 24 02:35 gen-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31M Aug 24 02:35 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  87K Aug 24 02:35 gen-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90K Aug 24 02:35 gen-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33M Aug 24 02:35 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90K Aug 24 02:35 gen-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  91K Aug 24 02:35 gen-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35M Aug 24 02:35 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94K Aug 24 02:35 gen-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94K Aug 24 02:35 gen-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  37M Aug 24 02:35 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99K Aug 24 02:35 gen-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  15K Aug 24 02:35 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 102K Aug 24 02:35 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39M Aug 24 02:35 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 106K Aug 24 02:35 gen-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 110K Aug 24 02:35 gen-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  41M Aug 24 02:35 gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 109K Aug 24 02:35 gen-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 111K Aug 24 02:35 gen-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42M Aug 24 02:35 gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 116K Aug 24 02:35 gen-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117K Aug 24 02:35 gen-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44M Aug 24 02:35 gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 118K Aug 24 02:35 gen-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 121K Aug 24 02:35 gen-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  46M Aug 24 02:35 gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 125K Aug 24 02:35 gen-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  18K Aug 24 02:35 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 129K Aug 24 02:35 gen-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  48M Aug 24 02:35 gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 129K Aug 24 02:35 gen-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 130K Aug 24 02:35 gen-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  50M Aug 24 02:35 gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 132K Aug 24 02:35 gen-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 139K Aug 24 02:35 gen-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  52M Aug 24 02:35 gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 138K Aug 24 02:35 gen-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 138K Aug 24 02:35 gen-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54M Aug 24 02:35 gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 146K Aug 24 02:35 gen-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 149K Aug 24 02:35 gen-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  56M Aug 24 02:35 gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 146K Aug 24 02:35 gen-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20K Aug 24 02:35 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 155K Aug 24 02:35 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58M Aug 24 02:35 gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 152K Aug 24 02:35 gen-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 158K Aug 24 02:35 gen-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  60M Aug 24 02:35 gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 157K Aug 24 02:35 gen-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 164K Aug 24 02:35 gen-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  62M Aug 24 02:35 gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 163K Aug 24 02:35 gen-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 167K Aug 24 02:35 gen-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  63M Aug 24 02:35 gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 167K Aug 24 02:35 gen-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 170K Aug 24 02:35 gen-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65M Aug 24 02:35 gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 171K Aug 24 02:35 gen-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  23K Aug 24 02:35 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 179K Aug 24 02:35 gen-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  67M Aug 24 02:35 gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 178K Aug 24 02:35 gen-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 178K Aug 24 02:35 gen-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69M Aug 24 02:35 gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 188K Aug 24 02:35 gen-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 187K Aug 24 02:35 gen-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  71M Aug 24 02:35 gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 185K Aug 24 02:35 gen-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 195K Aug 24 02:35 gen-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73M Aug 24 02:35 gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 193K Aug 24 02:35 gen-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 190K Aug 24 02:35 gen-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  75M Aug 24 02:35 gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 195K Aug 24 02:35 gen-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25K Aug 24 02:35 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 200K Aug 24 02:35 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77M Aug 24 02:35 gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 201K Aug 24 02:35 gen-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 206K Aug 24 02:35 gen-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79M Aug 24 02:35 gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 206K Aug 24 02:35 gen-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 216K Aug 24 02:35 gen-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  80M Aug 24 02:35 gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 214K Aug 24 02:35 gen-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 210K Aug 24 02:35 gen-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82M Aug 24 02:35 gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 221K Aug 24 02:35 gen-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 219K Aug 24 02:35 gen-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84M Aug 24 02:35 gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 222K Aug 24 02:35 gen-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 224K Aug 24 02:35 gen-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86M Aug 24 02:35 gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 227K Aug 24 02:35 gen-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 225K Aug 24 02:35 gen-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88M Aug 24 02:35 gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 232K Aug 24 02:35 gen-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 229K Aug 24 02:35 gen-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90M Aug 24 02:35 gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 240K Aug 24 02:35 gen-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 239K Aug 24 02:35 gen-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  92M Aug 24 02:35 gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 244K Aug 24 02:35 gen-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 243K Aug 24 02:35 gen-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94M Aug 24 02:35 gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 248K Aug 24 02:35 gen-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 7.1K Aug 24 02:35 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Aug 24 02:35 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 243K Aug 24 02:35 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  96M Aug 24 02:35 gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 249K Aug 24 02:35 gen-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 252K Aug 24 02:35 gen-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  97M Aug 24 02:35 gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 255K Aug 24 02:35 gen-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 254K Aug 24 02:35 gen-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99M Aug 24 02:35 gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 263K Aug 24 02:35 gen-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 264K Aug 24 02:35 gen-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101M Aug 24 02:35 gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 270K Aug 24 02:35 gen-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 261K Aug 24 02:35 gen-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103M Aug 24 02:35 gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 269K Aug 24 02:35 gen-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  32K Aug 24 02:35 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 273K Aug 24 02:35 gen-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 105M Aug 24 02:35 gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 274K Aug 24 02:35 gen-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 274K Aug 24 02:35 gen-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 107M Aug 24 02:35 gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 276K Aug 24 02:35 gen-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 277K Aug 24 02:35 gen-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 109M Aug 24 02:35 gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 280K Aug 24 02:35 gen-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 286K Aug 24 02:35 gen-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 111M Aug 24 02:35 gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 287K Aug 24 02:35 gen-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 288K Aug 24 02:35 gen-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 113M Aug 24 02:35 gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 294K Aug 24 02:35 gen-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  34K Aug 24 02:35 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 295K Aug 24 02:35 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 115M Aug 24 02:35 gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 297K Aug 24 02:35 gen-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 308K Aug 24 02:35 gen-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117M Aug 24 02:35 gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 300K Aug 24 02:35 gen-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 307K Aug 24 02:35 gen-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 118M Aug 24 02:35 gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 312K Aug 24 02:35 gen-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 315K Aug 24 02:35 gen-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 120M Aug 24 02:35 gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 318K Aug 24 02:35 gen-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 319K Aug 24 02:35 gen-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 122M Aug 24 02:35 gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 319K Aug 24 02:35 gen-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  37K Aug 24 02:35 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 318K Aug 24 02:35 gen-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 124M Aug 24 02:35 gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 326K Aug 24 02:35 gen-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 321K Aug 24 02:35 gen-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 126M Aug 24 02:35 gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 331K Aug 24 02:35 gen-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 328K Aug 24 02:35 gen-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 128M Aug 24 02:35 gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 334K Aug 24 02:35 gen-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 336K Aug 24 02:35 gen-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 130M Aug 24 02:35 gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 335K Aug 24 02:35 gen-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 339K Aug 24 02:35 gen-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 132M Aug 24 02:35 gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 338K Aug 24 02:35 gen-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  40K Aug 24 02:35 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 341K Aug 24 02:35 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134M Aug 24 02:35 gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 349K Aug 24 02:35 gen-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 349K Aug 24 02:35 gen-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 136M Aug 24 02:35 gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 355K Aug 24 02:35 gen-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 353K Aug 24 02:35 gen-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 137M Aug 24 02:35 gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 352K Aug 24 02:35 gen-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 360K Aug 24 02:35 gen-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 139M Aug 24 02:35 gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 361K Aug 24 02:35 gen-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 370K Aug 24 02:35 gen-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 141M Aug 24 02:35 gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 379K Aug 24 02:35 gen-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42K Aug 24 02:35 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 364K Aug 24 02:35 gen-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 143M Aug 24 02:35 gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 379K Aug 24 02:35 gen-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 379K Aug 24 02:35 gen-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 145M Aug 24 02:35 gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 374K Aug 24 02:35 gen-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 377K Aug 24 02:35 gen-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 147M Aug 24 02:35 gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 377K Aug 24 02:35 gen-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 386K Aug 24 02:35 gen-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 149M Aug 24 02:35 gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 386K Aug 24 02:35 gen-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 386K Aug 24 02:35 gen-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 151M Aug 24 02:35 gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 397K Aug 24 02:35 gen-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  45K Aug 24 02:35 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 394K Aug 24 02:35 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 153M Aug 24 02:35 gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 394K Aug 24 02:35 gen-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 394K Aug 24 02:35 gen-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 401K Aug 24 02:35 gen-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 405K Aug 24 02:35 gen-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 403K Aug 24 02:35 gen-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 408K Aug 24 02:35 gen-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 412K Aug 24 02:35 gen-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 408K Aug 24 02:35 gen-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 415K Aug 24 02:35 gen-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  47K Aug 24 02:35 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 423K Aug 24 02:35 gen-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 422K Aug 24 02:35 gen-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 429K Aug 24 02:35 gen-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 426K Aug 24 02:35 gen-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 421K Aug 24 02:35 gen-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 427K Aug 24 02:35 gen-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 435K Aug 24 02:35 gen-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 435K Aug 24 02:35 gen-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 440K Aug 24 02:35 gen-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 441K Aug 24 02:35 gen-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  50K Aug 24 02:35 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 440K Aug 24 02:35 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 440K Aug 24 02:35 gen-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 447K Aug 24 02:35 gen-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 447K Aug 24 02:35 gen-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 454K Aug 24 02:35 gen-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 457K Aug 24 02:35 gen-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 455K Aug 24 02:35 gen-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 461K Aug 24 02:35 gen-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 463K Aug 24 02:35 gen-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 457K Aug 24 02:35 gen-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  51K Aug 24 02:35 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 464K Aug 24 02:35 gen-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 474K Aug 24 02:35 gen-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 472K Aug 24 02:35 gen-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 477K Aug 24 02:35 gen-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 472K Aug 24 02:35 gen-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 472K Aug 24 02:35 gen-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 479K Aug 24 02:35 gen-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 484K Aug 24 02:35 gen-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 486K Aug 24 02:35 gen-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 494K Aug 24 02:35 gen-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  53K Aug 24 02:35 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Aug 24 02:35 shuffle-word-105-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-110-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Aug 24 02:35 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-115-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-120-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Aug 24 02:35 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-125-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-130-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Aug 24 02:35 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-135-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-140-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Aug 24 02:35 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-145-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  43K Aug 24 02:35 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-150-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Aug 24 02:35 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-155-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-160-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Aug 24 02:35 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-165-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-170-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Aug 24 02:35 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-175-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Aug 24 02:35 shuffle-word-180-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Aug 24 02:35 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-185-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-190-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Aug 24 02:35 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Aug 24 02:35 shuffle-word-195-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39K Aug 24 02:35 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Aug 24 02:35 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-205-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-210-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 523K Aug 24 02:35 shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-215-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-220-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Aug 24 02:35 shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-225-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-230-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Aug 24 02:35 shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-235-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-240-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Aug 24 02:35 shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-245-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  38K Aug 24 02:35 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-250-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Aug 24 02:35 shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-255-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-260-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 515K Aug 24 02:35 shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-265-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-270-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 512K Aug 24 02:35 shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-275-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-280-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-285-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-290-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-295-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Aug 24 02:35 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-305-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-310-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-315-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-320-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-325-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-330-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-335-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-340-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-345-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33K Aug 24 02:35 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-350-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-355-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-360-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-365-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-370-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-375-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-380-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-385-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-390-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-395-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35K Aug 24 02:35 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-405-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-410-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-415-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-420-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-425-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-430-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-435-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-440-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-445-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33K Aug 24 02:35 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-450-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-455-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-460-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-465-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-470-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-475-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-480-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-485-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-490-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-495-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  83K Aug 24 02:35 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  32K Aug 24 02:35 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-505-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-510-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-515-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-520-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-525-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-530-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-535-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-540-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-545-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Aug 24 02:35 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-550-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-555-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-560-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-565-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-570-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-575-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-580-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-585-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-590-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-595-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Aug 24 02:35 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-605-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-610-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-615-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-620-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-625-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-630-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-635-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-640-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-645-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Aug 24 02:35 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-650-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-655-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-660-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-665-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-670-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-675-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-680-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-685-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-690-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-695-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31K Aug 24 02:35 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-705-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-710-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-715-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-720-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-725-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-730-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-735-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-740-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-745-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Aug 24 02:35 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-750-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-755-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-760-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-765-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-770-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-775-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-780-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-785-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-790-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Aug 24 02:35 shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-795-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Aug 24 02:35 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Aug 24 02:35 shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-805-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-810-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-815-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-820-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-825-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-830-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-835-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-840-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-845-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Aug 24 02:35 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-850-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-855-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-860-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-865-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-870-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-875-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-880-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-885-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-890-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-895-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29K Aug 24 02:35 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-905-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-910-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-915-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-920-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-925-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-930-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-935-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-940-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-945-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-950-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-955-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-960-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-965-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  28K Aug 24 02:35 shuffle-word-970-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-975-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-980-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-985-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  26K Aug 24 02:35 shuffle-word-990-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27K Aug 24 02:35 shuffle-word-995-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 6.1K Aug 24 02:35 word-2-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 50 &\n",
+    "for i in {5..1000..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 50 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 50+ - 4200 words dataset\n",
+    "# \n",
+    "for i in {1100..8000..100} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -lh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "c773c099",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:35:56.772968Z",
+     "iopub.status.busy": "2023-08-24T02:35:56.772665Z",
+     "iopub.status.idle": "2023-08-24T02:36:11.271527Z",
+     "shell.execute_reply": "2023-08-24T02:36:11.270822Z"
+    },
+    "papermill": {
+     "duration": 14.829894,
+     "end_time": "2023-08-24T02:36:11.273578",
+     "exception": false,
+     "start_time": "2023-08-24T02:35:56.443684",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5base-mem-template.yaml', '--trainer.logger.init_args.name=v5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 3539354687\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 3539354687\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: wandb version 0.15.8 is available!  To upgrade, please run:\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m:  $ pip install wandb --upgrade\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.4\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230824_023602-4orwfuq7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/4orwfuq7\u001b[0m\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 254, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
+      "    self.instantiate_classes()\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
+      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
+      "                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1128, in instantiate_classes\r\n",
+      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
+      "                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
+      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
+      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_core.py\", line 1122, in instantiate_classes\r\n",
+      "    component.instantiate_class(component, cfg)\r\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jsonargparse/_signatures.py\", line 551, in group_instantiate_class\r\n",
+      "    parent[key] = group.group_class(**value)\r\n",
+      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-4k.pth' does not exist\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-hs2x-L6-D4096-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/4orwfuq7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230824_023602-4orwfuq7/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/v5base-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n",
+    "        --model.lr_init=3e-4 \\\n",
+    "        --model.lr_final=1e-4 \\\n",
+    "        --data.max_token_size=8192 \\\n",
+    "        --model.ctx_len=4096 \\\n",
+    "        --model.bptt_learning_range=2 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "35b495ff",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:36:11.995388Z",
+     "iopub.status.busy": "2023-08-24T02:36:11.995120Z",
+     "iopub.status.idle": "2023-08-24T02:36:14.810485Z",
+     "shell.execute_reply": "2023-08-24T02:36:14.809779Z"
+    },
+    "papermill": {
+     "duration": 3.147068,
+     "end_time": "2023-08-24T02:36:14.812281",
+     "exception": false,
+     "start_time": "2023-08-24T02:36:11.665213",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "b894b210",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:36:15.524081Z",
+     "iopub.status.busy": "2023-08-24T02:36:15.523781Z",
+     "iopub.status.idle": "2023-08-24T02:36:20.608550Z",
+     "shell.execute_reply": "2023-08-24T02:36:20.607961Z"
+    },
+    "papermill": {
+     "duration": 5.47091,
+     "end_time": "2023-08-24T02:36:20.610038",
+     "exception": false,
+     "start_time": "2023-08-24T02:36:15.139128",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "f44cffc9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-24T02:36:21.266193Z",
+     "iopub.status.busy": "2023-08-24T02:36:21.265934Z",
+     "iopub.status.idle": "2023-08-24T02:36:26.256654Z",
+     "shell.execute_reply": "2023-08-24T02:36:26.255810Z"
+    },
+    "papermill": {
+     "duration": 5.321145,
+     "end_time": "2023-08-24T02:36:26.258238",
+     "exception": false,
+     "start_time": "2023-08-24T02:36:20.937093",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 366, in <module>\r\n",
+      "    asyncio.run(main_function())\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 190, in run\r\n",
+      "    return runner.run(main)\r\n",
+      "           ^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/runners.py\", line 118, in run\r\n",
+      "    return self._loop.run_until_complete(task)\r\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/usr/lib/python3.11/asyncio/base_events.py\", line 653, in run_until_complete\r\n",
+      "    return future.result()\r\n",
+      "           ^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/../memory_script/eval_v5_memory_guided.py\", line 58, in main_function\r\n",
+      "    model = SimpleRWKV(model_path, device=\"cuda\")\r\n",
+      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1378, in __init__\r\n",
+      "    self.model = RWKV(**model_config)\r\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 553, in __init__\r\n",
+      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
+      "ValueError: load_model file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/model/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth' does not exist\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 166.843506,
+   "end_time": "2023-08-24T02:36:26.864059",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/v5-L6-D4096-E1e-1-ctx4k-part2.ipynb",
+   "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-headsize2x/v5-L6-D4096-E1e-1-ctx4k-part2.ipynb",
+   "parameters": {},
+   "start_time": "2023-08-24T02:33:40.020553",
+   "version": "2.4.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file