diff --git a/.gitattributes b/.gitattributes
index b6d19782bcbc7fe236d0826bfe059c5aa9f6ba2a..2e6af687414ba5d927742359c3837e0b8d27f1db 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -91,17 +91,3 @@ experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb filter=lfs
 experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text
 experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text
 experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb filter=lfs diff=lfs merge=lfs -text
-experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb filter=lfs diff=lfs merge=lfs -text
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth
deleted file mode 100644
index df9e649b821c97a2e5912e7f73555d6ed6cb4133..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c447bfd1844d0c3e536fb8824d029fd8b0e334e1368f807a4e85cd7099005130
-size 1721187285
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth
deleted file mode 100644
index 7e510b454e51f3e1360458d4aa4bc8f97480caba..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:70b243f59685c4df841f16343bc7ff6947a3125cec5dabf9035b28b65c04da0e
-size 1721187285
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth
deleted file mode 100644
index af865daa0e29c34250db338e077056e58b7bba5e..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a57c278ed7e7e2f9d7f0436540674bfa5178adcd04c3154f5d92992e0602c55b
-size 1721187621
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth
deleted file mode 100644
index 613ae00e6b94d2f54a518f1072636f21e570c85e..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:06105d96413046fce0ec189b9c4685a813cfa7147300851c5d2afc7b5adbcb38
-size 1721189797
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb
deleted file mode 100644
index 04f75fe75d5a993aa7050629019cd0e3cf72c508..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:31f61ce42e82d9a475446458ed015a190f16dd9b2b17bd67f4feedd9f72750ad
-size 16577145
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb
deleted file mode 100644
index 2caac2060cc81dbfc7e4840004960eeed06d0e29..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b53c27ed2c20b9f1f690647a83c0fbe2ce09594518b9ec557f515a4f8b548f2b
-size 15941299
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth
deleted file mode 100644
index 2f736f75a1664aecb04c1e0fe217b71a77aecbf5..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c380bcd4b861a8af263fd56dc6e183b9e06ba0bc8f9895c4dcd8a678b58296e8
-size 1721187621
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth
deleted file mode 100644
index 7f96f25f0aac70ea5a9c88a5208d0c071bd9fee9..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:89f8caf661887bdba1897a10009f033331c552bfb763112e6da1b850d8ec3ff7
-size 1721189525
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth
deleted file mode 100644
index 15f17b01a2826359ce6ac3f3bea9b310b2b596e3..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2680e091197e798686c97bdd2af0f6827f2b29c648cc1ae03f67d6f094859618
-size 1721189525
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth
deleted file mode 100644
index 8bb49e3d9132afcd95cecfa46932131d2971c1e2..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:35a5d7571d90160edc20ce95abfdbcb6109ad47eccdefe8051bd8f15d12bf326
-size 1721189525
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth
deleted file mode 100644
index ac0f774b491b1af1dec3e65871dcb2618a295104..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f6b50bf05f191da87a6a17072d485d4059a4ded1335605e6b7bb8e9f2648d966
-size 1721189525
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb
deleted file mode 100644
index ae3b85f4a8acf6c3001f445ade22f015a8d52327..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb
+++ /dev/null
@@ -1,2461 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "d3126ef2",
-   "metadata": {
-    "papermill": {
-     "duration": 0.004879,
-     "end_time": "2023-10-11T08:02:23.608034",
-     "exception": false,
-     "start_time": "2023-10-11T08:02:23.603155",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "source": [
-    "# RWKV v5 multi-size training experiment\n",
-    "\n",
-    "**Note:** This project assumes you have the rwkv-infctx conda env setup"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "986070aa",
-   "metadata": {
-    "papermill": {
-     "duration": 0.002523,
-     "end_time": "2023-10-11T08:02:23.613605",
-     "exception": false,
-     "start_time": "2023-10-11T08:02:23.611082",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "source": [
-    "# Basic Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "dc924c7f",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:02:23.620990Z",
-     "iopub.status.busy": "2023-10-11T08:02:23.620432Z",
-     "iopub.status.idle": "2023-10-11T08:02:24.379549Z",
-     "shell.execute_reply": "2023-10-11T08:02:24.378580Z"
-    },
-    "papermill": {
-     "duration": 0.765369,
-     "end_time": "2023-10-11T08:02:24.381741",
-     "exception": false,
-     "start_time": "2023-10-11T08:02:23.616372",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# First lets setup the various directories, and init the model\n",
-    "!mkdir -p ../../../../model/\n",
-    "!mkdir -p ../../../../datapath/\n",
-    "!mkdir -p ../../../../checkpoint/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "2bbc32ac",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:02:24.389788Z",
-     "iopub.status.busy": "2023-10-11T08:02:24.389227Z",
-     "iopub.status.idle": "2023-10-11T08:02:24.398441Z",
-     "shell.execute_reply": "2023-10-11T08:02:24.397578Z"
-    },
-    "papermill": {
-     "duration": 0.015548,
-     "end_time": "2023-10-11T08:02:24.400362",
-     "exception": false,
-     "start_time": "2023-10-11T08:02:24.384814",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DEEPSPEED_STRAT: deepspeed_stage_2_offload\n",
-      "ENABLE_WANDB: True\n",
-      "GPU_DEVICES: auto\n",
-      "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train\n",
-      "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
-      "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
-      "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
-     ]
-    }
-   ],
-   "source": [
-    "DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n",
-    "GPU_DEVICES=\"auto\"\n",
-    "ENABLE_WANDB=True\n",
-    "\n",
-    "EMBED_SCALE=0.01\n",
-    "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
-    "\n",
-    "EMBED_SIZE=2048\n",
-    "\n",
-    "WANDB_PREFIX=f\"[Multi-size] v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE}\"\n",
-    "FILENAME_PREFIX=f\"v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE_LABEL}\"\n",
-    "\n",
-    "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
-    "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
-    "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
-    "\n",
-    "if ENABLE_WANDB:\n",
-    "    WANDB_MODE=\"online\"\n",
-    "else:\n",
-    "    WANDB_MODE=\"disabled\"\n",
-    "\n",
-    "# Computing the notebook, and various paths\n",
-    "import os\n",
-    "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
-    "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n",
-    "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
-    "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
-    "\n",
-    "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
-    "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
-    "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
-    "print(\"PROJECT_DIR:\", PROJECT_DIR)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "ffa69634",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:02:24.408311Z",
-     "iopub.status.busy": "2023-10-11T08:02:24.407798Z",
-     "iopub.status.idle": "2023-10-11T08:03:19.634663Z",
-     "shell.execute_reply": "2023-10-11T08:03:19.633765Z"
-    },
-    "papermill": {
-     "duration": 55.233419,
-     "end_time": "2023-10-11T08:03:19.636895",
-     "exception": false,
-     "start_time": "2023-10-11T08:02:24.403476",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--2023-10-11 08:02:24--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\r\n",
-      "Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.7, 18.154.227.69, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.\r\n",
-      "HTTP request sent, awaiting response... 302 Found\r\n",
-      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
-      "--2023-10-11 08:02:24--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
-      "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.69, 3.162.112.2, 3.162.112.100, ...\r\n",
-      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.69|:443... connected.\r\n",
-      "HTTP request sent, awaiting response... "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "200 OK\r\n",
-      "Length: 1066536657 (1017M) [binary/octet-stream]\r\n",
-      "Saving to: ‘v5-L6-D2048-E0_01-split-2a.pth’\r\n",
-      "\r\n",
-      "\r",
-      "          v5-L6-D20   0%[                    ]       0  --.-KB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "         v5-L6-D204   1%[                    ]  15.26M  42.9MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "        v5-L6-D2048   3%[                    ]  30.52M  47.7MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "       v5-L6-D2048-   4%[                    ]  45.26M  51.5MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "      v5-L6-D2048-E   5%[>                   ]  59.20M  52.1MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "     v5-L6-D2048-E0   6%[>                   ]  65.20M  48.8MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "    v5-L6-D2048-E0_   7%[>                   ]  76.29M  44.4MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "   v5-L6-D2048-E0_0   8%[>                   ]  91.03M  47.2MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "  v5-L6-D2048-E0_01   9%[>                   ]  91.55M  43.0MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      " v5-L6-D2048-E0_01-  10%[=>                  ] 106.81M  43.2MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "v5-L6-D2048-E0_01-s  11%[=>                  ] 120.25M  43.9MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "5-L6-D2048-E0_01-sp  12%[=>                  ] 122.07M  41.4MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-L6-D2048-E0_01-spl  13%[=>                  ] 136.81M  42.5MB/s    eta 21s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "L6-D2048-E0_01-spli  14%[=>                  ] 152.07M  42.8MB/s    eta 21s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "6-D2048-E0_01-split  15%[==>                 ] 152.72M  40.5MB/s    eta 21s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-D2048-E0_01-split-  16%[==>                 ] 167.85M  41.5MB/s    eta 21s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "D2048-E0_01-split-2  18%[==>                 ] 183.10M  43.0MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2048-E0_01-split-2a  19%[==>                 ] 198.36M  43.4MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "048-E0_01-split-2a.  20%[===>                ] 213.11M  44.1MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "48-E0_01-split-2a.p  22%[===>                ] 228.36M  43.3MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "8-E0_01-split-2a.pt  22%[===>                ] 228.87M  41.1MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-E0_01-split-2a.pth  24%[===>                ] 244.13M  41.0MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "E0_01-split-2a.pth   25%[====>               ] 259.40M  42.4MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "0_01-split-2a.pth    26%[====>               ] 272.83M  40.4MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "_01-split-2a.pth     28%[====>               ] 289.40M  41.4MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "01-split-2a.pth      28%[====>               ] 289.92M  37.9MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "1-split-2a.pth       29%[====>               ] 304.66M  36.1MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-split-2a.pth        30%[=====>              ] 305.18M  33.4MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "split-2a.pth         31%[=====>              ] 318.60M  33.2MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "plit-2a.pth          31%[=====>              ] 320.29M  33.3MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "lit-2a.pth           31%[=====>              ] 320.57M  30.7MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "it-2a.pth            32%[=====>              ] 335.18M  30.1MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "t-2a.pth             33%[=====>              ] 345.53M  31.2MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-2a.pth              34%[=====>              ] 350.82M  29.7MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2a.pth               35%[======>             ] 360.98M  31.3MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "a.pth                36%[======>             ] 366.20M  29.6MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      ".pth                 37%[======>             ] 380.96M  30.8MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "pth                  38%[======>             ] 392.79M  32.0MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "th                   39%[======>             ] 396.73M  29.1MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "h                    40%[=======>            ] 411.99M  29.1MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                     41%[=======>            ] 426.73M  28.7MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                  v  42%[=======>            ] 427.25M  29.1MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                 v5  42%[=======>            ] 435.25M  27.9MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                v5-  43%[=======>            ] 438.04M  28.2MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "               v5-L  43%[=======>            ] 442.05M  29.7MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "              v5-L6  43%[=======>            ] 446.00M  31.1MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "             v5-L6-  44%[=======>            ] 457.24M  33.9MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "            v5-L6-D  45%[========>           ] 457.89M  31.6MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "           v5-L6-D2  46%[========>           ] 473.02M  34.8MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "          v5-L6-D20  48%[========>           ] 488.28M  34.1MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "         v5-L6-D204  49%[========>           ] 503.03M  34.6MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "        v5-L6-D2048  50%[=========>          ] 518.29M  37.3MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "       v5-L6-D2048-  51%[=========>          ] 525.10M  35.8MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "      v5-L6-D2048-E  52%[=========>          ] 534.05M  34.4MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "     v5-L6-D2048-E0  53%[=========>          ] 548.80M  34.4MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "    v5-L6-D2048-E0_  55%[==========>         ] 562.75M  33.8MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "   v5-L6-D2048-E0_0  56%[==========>         ] 579.31M  36.0MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "  v5-L6-D2048-E0_01  57%[==========>         ] 581.49M  36.7MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      " v5-L6-D2048-E0_01-  58%[==========>         ] 592.93M  37.4MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "v5-L6-D2048-E0_01-s  58%[==========>         ] 595.09M  37.1MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "5-L6-D2048-E0_01-sp  60%[===========>        ] 610.35M  38.5MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-L6-D2048-E0_01-spl  61%[===========>        ] 625.61M  38.7MB/s    eta 11s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "L6-D2048-E0_01-spli  62%[===========>        ] 640.36M  39.9MB/s    eta 11s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "6-D2048-E0_01-split  64%[===========>        ] 653.30M  39.5MB/s    eta 11s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-D2048-E0_01-split-  64%[===========>        ] 656.13M  38.5MB/s    eta 11s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "D2048-E0_01-split-2  66%[============>       ] 671.38M  38.9MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2048-E0_01-split-2a  67%[============>       ] 685.57M  39.7MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "048-E0_01-split-2a.  67%[============>       ] 686.64M  37.5MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "48-E0_01-split-2a.p  68%[============>       ] 701.39M  37.9MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "8-E0_01-split-2a.pt  69%[============>       ] 708.59M  38.8MB/s    eta 8s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-E0_01-split-2a.pth  70%[=============>      ] 715.34M  38.2MB/s    eta 8s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "E0_01-split-2a.pth   71%[=============>      ] 731.91M  40.7MB/s    eta 8s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "0_01-split-2a.pth    73%[=============>      ] 747.17M  38.0MB/s    eta 8s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "_01-split-2a.pth     73%[=============>      ] 747.75M  38.0MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "01-split-2a.pth      74%[=============>      ] 762.42M  40.2MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "1-split-2a.pth       75%[==============>     ] 762.94M  37.2MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-split-2a.pth        76%[==============>     ] 776.37M  36.7MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "split-2a.pth         76%[==============>     ] 778.20M  34.9MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "plit-2a.pth          77%[==============>     ] 791.63M  38.1MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "lit-2a.pth           78%[==============>     ] 793.46M  36.0MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "it-2a.pth            79%[==============>     ] 808.20M  38.6MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "t-2a.pth             80%[===============>    ] 816.07M  36.7MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-2a.pth              81%[===============>    ] 823.97M  34.7MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2a.pth               82%[===============>    ] 837.41M  36.4MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "a.pth                83%[===============>    ] 853.98M  38.3MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      ".pth                 85%[================>   ] 867.67M  38.4MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "pth                  85%[================>   ] 873.17M  39.1MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "th                   87%[================>   ] 885.01M  36.1MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "h                    88%[================>   ] 899.75M  37.6MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                     88%[================>   ] 900.40M  34.6MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                  v  90%[=================>  ] 915.53M  35.4MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                 v5  91%[=================>  ] 930.78M  37.6MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                v5-  92%[=================>  ] 945.53M  40.9MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "               v5-L  93%[=================>  ] 946.04M  37.6MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "              v5-L6  94%[=================>  ] 959.48M  38.0MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "             v5-L6-  94%[=================>  ] 961.30M  33.1MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "            v5-L6-D  95%[==================> ] 976.05M  34.9MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "           v5-L6-D2  97%[==================> ] 991.31M  34.8MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "          v5-L6-D20  97%[==================> ] 992.94M  35.0MB/s    eta 2s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "         v5-L6-D204  98%[==================> ]   1005M  34.7MB/s    eta 0s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "        v5-L6-D2048  99%[==================> ]   1016M  33.7MB/s    eta 0s     \r",
-      "v5-L6-D2048-E0_01-s 100%[===================>]   1017M  33.9MB/s    in 28s     \r\n",
-      "\r\n",
-      "2023-10-11 08:02:52 (36.4 MB/s) - ‘v5-L6-D2048-E0_01-split-2a.pth’ saved [1066536657/1066536657]\r\n",
-      "\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--2023-10-11 08:02:53--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\r\n",
-      "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.7, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n",
-      "HTTP request sent, awaiting response... 302 Found\r\n",
-      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
-      "--2023-10-11 08:02:53--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
-      "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.95, 3.162.112.100, 3.162.112.2, ...\r\n",
-      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.95|:443... connected.\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "HTTP request sent, awaiting response... "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "200 OK\r\n",
-      "Length: 1066536657 (1017M) [binary/octet-stream]\r\n",
-      "Saving to: ‘v5-L6-D2048-E0_01-split-2b.pth’\r\n",
-      "\r\n",
-      "\r",
-      "          v5-L6-D20   0%[                    ]       0  --.-KB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "         v5-L6-D204   1%[                    ]  14.74M  67.8MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "        v5-L6-D2048   2%[                    ]  28.69M  63.1MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "       v5-L6-D2048-   3%[                    ]  30.52M  42.3MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "      v5-L6-D2048-E   4%[                    ]  45.26M  45.2MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "     v5-L6-D2048-E0   4%[                    ]  45.78M  37.9MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "    v5-L6-D2048-E0_   6%[>                   ]  61.03M  41.2MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "   v5-L6-D2048-E0_0   7%[>                   ]  75.78M  45.0MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "  v5-L6-D2048-E0_01   8%[>                   ]  85.94M  45.6MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      " v5-L6-D2048-E0_01-   9%[>                   ]  91.55M  40.8MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "v5-L6-D2048-E0_01-s  10%[=>                  ] 106.81M  40.5MB/s               "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "5-L6-D2048-E0_01-sp  12%[=>                  ] 122.07M  40.2MB/s    eta 22s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-L6-D2048-E0_01-spl  13%[=>                  ] 137.33M  41.7MB/s    eta 22s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "L6-D2048-E0_01-spli  14%[=>                  ] 152.07M  42.9MB/s    eta 22s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "6-D2048-E0_01-split  16%[==>                 ] 167.33M  43.6MB/s    eta 22s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-D2048-E0_01-split-  17%[==>                 ] 181.32M  44.9MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "D2048-E0_01-split-2  18%[==>                 ] 183.10M  41.7MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2048-E0_01-split-2b  19%[==>                 ] 196.53M  41.8MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "048-E0_01-split-2b.  19%[==>                 ] 198.36M  39.0MB/s    eta 19s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "48-E0_01-split-2b.p  20%[===>                ] 213.11M  39.0MB/s    eta 20s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "8-E0_01-split-2b.pt  21%[===>                ] 220.29M  40.8MB/s    eta 20s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-E0_01-split-2b.pth  22%[===>                ] 228.36M  39.9MB/s    eta 20s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "E0_01-split-2b.pth   24%[===>                ] 244.13M  40.3MB/s    eta 20s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "0_01-split-2b.pth    25%[====>               ] 259.40M  40.4MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "_01-split-2b.pth     26%[====>               ] 274.14M  42.1MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "01-split-2b.pth      27%[====>               ] 274.66M  38.5MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "1-split-2b.pth       28%[====>               ] 289.92M  41.6MB/s    eta 18s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-split-2b.pth        30%[=====>              ] 305.18M  41.6MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "split-2b.pth         31%[=====>              ] 320.43M  40.9MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "plit-2b.pth          32%[=====>              ] 335.18M  41.2MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "lit-2b.pth           33%[=====>              ] 335.69M  38.5MB/s    eta 17s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "it-2b.pth            34%[=====>              ] 350.95M  38.6MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "t-2b.pth             35%[======>             ] 365.70M  40.9MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-2b.pth              36%[======>             ] 366.20M  38.0MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2b.pth               37%[======>             ] 381.47M  38.8MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "b.pth                37%[======>             ] 385.65M  39.1MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      ".pth                 39%[======>             ] 396.73M  36.3MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "pth                  39%[======>             ] 406.75M  37.5MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "th                   40%[=======>            ] 411.99M  33.0MB/s    eta 16s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "h                    42%[=======>            ] 427.25M  33.6MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                     43%[=======>            ] 441.98M  32.7MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                  v  43%[=======>            ] 442.51M  32.5MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                 v5  44%[=======>            ] 457.25M  32.2MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                v5-  45%[========>           ] 457.76M  32.2MB/s    eta 15s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "               v5-L  46%[========>           ] 472.50M  31.5MB/s    eta 14s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "              v5-L6  46%[========>           ] 473.02M  31.5MB/s    eta 14s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "             v5-L6-  48%[========>           ] 488.28M  30.9MB/s    eta 14s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "            v5-L6-D  49%[========>           ] 503.54M  33.6MB/s    eta 14s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "           v5-L6-D2  50%[=========>          ] 518.29M  34.2MB/s    eta 14s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "          v5-L6-D20  51%[=========>          ] 518.80M  34.8MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "         v5-L6-D204  52%[=========>          ] 534.05M  34.2MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "        v5-L6-D2048  54%[=========>          ] 549.31M  37.5MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "       v5-L6-D2048-  55%[==========>         ] 564.06M  37.7MB/s    eta 13s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "      v5-L6-D2048-E  55%[==========>         ] 565.78M  37.7MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "     v5-L6-D2048-E0  57%[==========>         ] 579.83M  37.8MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "    v5-L6-D2048-E0_  58%[==========>         ] 595.09M  39.7MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "   v5-L6-D2048-E0_0  60%[===========>        ] 610.35M  40.9MB/s    eta 12s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "  v5-L6-D2048-E0_01  61%[===========>        ] 625.47M  44.1MB/s    eta 10s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      " v5-L6-D2048-E0_01-  61%[===========>        ] 629.82M  42.6MB/s    eta 10s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "v5-L6-D2048-E0_01-s  63%[===========>        ] 640.87M  42.6MB/s    eta 10s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "5-L6-D2048-E0_01-sp  64%[===========>        ] 656.13M  45.6MB/s    eta 10s    "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-L6-D2048-E0_01-spl  66%[============>       ] 671.38M  45.8MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "L6-D2048-E0_01-spli  67%[============>       ] 686.64M  47.1MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "6-D2048-E0_01-split  69%[============>       ] 701.90M  47.0MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-D2048-E0_01-split-  70%[=============>      ] 717.16M  46.9MB/s    eta 9s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "D2048-E0_01-split-2  71%[=============>      ] 730.60M  47.8MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2048-E0_01-split-2b  73%[=============>      ] 747.17M  45.9MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "048-E0_01-split-2b.  74%[=============>      ] 755.98M  45.7MB/s    eta 7s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "48-E0_01-split-2b.p  75%[==============>     ] 762.94M  43.2MB/s    eta 6s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "8-E0_01-split-2b.pt  76%[==============>     ] 777.68M  45.4MB/s    eta 6s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-E0_01-split-2b.pth  76%[==============>     ] 778.32M  42.7MB/s    eta 6s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "E0_01-split-2b.pth   78%[==============>     ] 793.46M  42.0MB/s    eta 6s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "0_01-split-2b.pth    79%[==============>     ] 808.20M  41.7MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "_01-split-2b.pth     80%[===============>    ] 814.09M  42.3MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "01-split-2b.pth      80%[===============>    ] 823.46M  41.0MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "1-split-2b.pth       81%[===============>    ] 823.97M  40.5MB/s    eta 5s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-split-2b.pth        82%[===============>    ] 838.71M  38.6MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "split-2b.pth         83%[===============>    ] 853.98M  41.4MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "plit-2b.pth          84%[===============>    ] 854.61M  38.0MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "lit-2b.pth           85%[================>   ] 869.24M  35.7MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "it-2b.pth            85%[================>   ] 869.75M  35.3MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "t-2b.pth             86%[================>   ] 875.74M  34.3MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "-2b.pth              87%[================>   ] 885.01M  32.5MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "2b.pth               88%[================>   ] 900.27M  33.8MB/s    eta 4s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "b.pth                89%[================>   ] 913.70M  34.5MB/s    eta 3s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      ".pth                 90%[=================>  ] 924.21M  34.8MB/s    eta 3s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "pth                  91%[=================>  ] 930.27M  35.3MB/s    eta 3s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "th                   93%[=================>  ] 946.04M  34.9MB/s    eta 3s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "h                    94%[=================>  ] 961.30M  37.2MB/s    eta 1s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                     95%[==================> ] 970.14M  35.7MB/s    eta 1s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                  v  96%[==================> ] 976.55M  34.9MB/s    eta 1s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                 v5  97%[==================> ] 991.82M  37.0MB/s    eta 1s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                v5-  98%[==================> ] 998.13M  35.6MB/s    eta 1s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "               v5-L  98%[==================> ]   1007M  37.2MB/s    eta 0s     "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "              v5-L6  99%[==================> ]   1016M  34.9MB/s    eta 0s     \r",
-      "v5-L6-D2048-E0_01-s 100%[===================>]   1017M  35.1MB/s    in 26s     \r\n",
-      "\r\n",
-      "2023-10-11 08:03:19 (38.9 MB/s) - ‘v5-L6-D2048-E0_01-split-2b.pth’ saved [1066536657/1066536657]\r\n",
-      "\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Get the init split model, and finetune from there\n",
-    "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\"\n",
-    "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "2a3cd2d1",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:03:19.666619Z",
-     "iopub.status.busy": "2023-10-11T08:03:19.665958Z",
-     "iopub.status.idle": "2023-10-11T08:03:29.305787Z",
-     "shell.execute_reply": "2023-10-11T08:03:29.304873Z"
-    },
-    "papermill": {
-     "duration": 9.658186,
-     "end_time": "2023-10-11T08:03:29.308744",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:19.650558",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):   0%|         | 0/27200 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):   7%| | 2000/27200 [00:00<00:01, 16356.85 examp"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):  15%|▏| 4000/27200 [00:00<00:01, 17283.77 examp"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):  22%|▏| 6000/27200 [00:00<00:01, 17873.97 examp"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):  29%|▎| 8000/27200 [00:00<00:01, 18442.59 examp"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):  40%|▍| 11000/27200 [00:00<00:00, 19135.78 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/2 shards):  50%|▌| 13600/27200 [00:00<00:00, 19543.92 exam\r",
-      "Saving the dataset (1/2 shards):  50%|▌| 13600/27200 [00:00<00:00, 19543.92 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (1/2 shards):  65%|▋| 17600/27200 [00:00<00:00, 20515.42 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (1/2 shards):  79%|▊| 21600/27200 [00:01<00:00, 21426.57 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (1/2 shards):  94%|▉| 25600/27200 [00:01<00:00, 22078.81 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (2/2 shards): 100%|█| 27200/27200 [00:01<00:00, 22078.81 exam\r",
-      "Saving the dataset (2/2 shards): 100%|█| 27200/27200 [00:01<00:00, 20603.99 exam\r\n",
-      "\r",
-      "Saving the dataset (0/1 shards):   0%|           | 0/109 [00:00<?, ? examples/s]\r",
-      "Saving the dataset (1/1 shards): 100%|█| 109/109 [00:00<00:00, 8117.24 examples/\r",
-      "Saving the dataset (1/1 shards): 100%|█| 109/109 [00:00<00:00, 7809.82 examples/\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Lets preload the requried datasets\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    python3 preload_datapath.py \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "77d1d3e8",
-   "metadata": {
-    "papermill": {
-     "duration": 0.016656,
-     "end_time": "2023-10-11T08:03:29.342825",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:29.326169",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "source": [
-    "## Enwiki Stage 3 : Split-Baseline-A training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "42cb403e",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:03:29.379159Z",
-     "iopub.status.busy": "2023-10-11T08:03:29.378428Z",
-     "iopub.status.idle": "2023-10-11T08:03:46.935627Z",
-     "shell.execute_reply": "2023-10-11T08:03:46.934802Z"
-    },
-    "papermill": {
-     "duration": 17.577903,
-     "end_time": "2023-10-11T08:03:46.937715",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:29.359812",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-10-11 08:03:33,838] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-a3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2a.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-a3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2a.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'].\r\n",
-      "  rank_zero_warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1933922385\r\n",
-      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 1933922385\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 278, in <module>\r\n",
-      "    cli_main()\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n",
-      "    LightningCLI(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
-      "    self.instantiate_classes()\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
-      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n",
-      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n",
-      "    component.instantiate_class(component, cfg)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n",
-      "    parent[key] = group.group_class(**value)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2a.pth' does not exist\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo/logs\u001b[0m\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Start the foundation model training\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
-    "    python3 lightning_trainer.py fit \\\n",
-    "        -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n",
-    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion A3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
-    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
-    "        --trainer.devices=\"{GPU_DEVICES}\" \\\n",
-    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/\" \\\n",
-    "        --model.load_model=\"../model/{FILENAME_PREFIX}-split-2a.pth\" \\\n",
-    "        --model.ctx_len=4096 \\\n",
-    "        --model.bptt_learning_range=1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "53867c42",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:03:46.969471Z",
-     "iopub.status.busy": "2023-10-11T08:03:46.969019Z",
-     "iopub.status.idle": "2023-10-11T08:03:50.682437Z",
-     "shell.execute_reply": "2023-10-11T08:03:50.680986Z"
-    },
-    "papermill": {
-     "duration": 3.732808,
-     "end_time": "2023-10-11T08:03:50.685581",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:46.952773",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-10-11 08:03:49,278] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in <module>\r\n",
-      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
-      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
-      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
-      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-a3/last.ckpt/latest\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth': No such file or directory\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Lets export the model from the checkpoint\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"bf16\"\n",
-    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "5688e577",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:03:50.806267Z",
-     "iopub.status.busy": "2023-10-11T08:03:50.804997Z",
-     "iopub.status.idle": "2023-10-11T08:03:56.788036Z",
-     "shell.execute_reply": "2023-10-11T08:03:56.786568Z"
-    },
-    "papermill": {
-     "duration": 6.08675,
-     "end_time": "2023-10-11T08:03:56.790510",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:50.703760",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-10-11 08:03:54,934] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in <module>\r\n",
-      "    model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n",
-      "    self.model = RWKV(**model_config)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth' does not exist\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# # Lets do a quick dragon prompt validation\n",
-    "!cd \"{INFERENCE_DIR}\" && \\\n",
-    "    python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"cuda fp32\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b4927e87",
-   "metadata": {
-    "papermill": {
-     "duration": 0.015295,
-     "end_time": "2023-10-11T08:03:56.820640",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:56.805345",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "source": [
-    "## Enwiki Stage 3 : Split-Baseline-B training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "6bdd285a",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:03:56.853495Z",
-     "iopub.status.busy": "2023-10-11T08:03:56.852946Z",
-     "iopub.status.idle": "2023-10-11T08:04:11.500794Z",
-     "shell.execute_reply": "2023-10-11T08:04:11.499336Z"
-    },
-    "papermill": {
-     "duration": 14.668001,
-     "end_time": "2023-10-11T08:04:11.503644",
-     "exception": false,
-     "start_time": "2023-10-11T08:03:56.835643",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-10-11 08:04:01,096] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'].\r\n",
-      "  rank_zero_warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1732922148\r\n",
-      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 1732922148\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 278, in <module>\r\n",
-      "    cli_main()\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n",
-      "    LightningCLI(\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n",
-      "    self.instantiate_classes()\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n",
-      "    self.config_init = self.parser.instantiate_classes(self.config)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n",
-      "    cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n",
-      "    cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n",
-      "    component.instantiate_class(component, cfg)\r\n",
-      "  File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n",
-      "    parent[key] = group.group_class(**value)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2b.pth' does not exist\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j/logs\u001b[0m\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Start the foundation model training\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
-    "    python3 lightning_trainer.py fit \\\n",
-    "        -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n",
-    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion B3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
-    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
-    "        --trainer.devices=\"{GPU_DEVICES}\" \\\n",
-    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/\" \\\n",
-    "        --model.load_model=\"../model/{FILENAME_PREFIX}-split-2b.pth\" \\\n",
-    "        --model.ctx_len=4096 \\\n",
-    "        --model.bptt_learning_range=1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "ae4623a1",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:04:11.546046Z",
-     "iopub.status.busy": "2023-10-11T08:04:11.544870Z",
-     "iopub.status.idle": "2023-10-11T08:04:15.274349Z",
-     "shell.execute_reply": "2023-10-11T08:04:15.272957Z"
-    },
-    "papermill": {
-     "duration": 3.754115,
-     "end_time": "2023-10-11T08:04:15.277163",
-     "exception": false,
-     "start_time": "2023-10-11T08:04:11.523048",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-10-11 08:04:13,869] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in <module>\r\n",
-      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
-      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
-      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
-      "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/last.ckpt/latest\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth': No such file or directory\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Lets export the model from the checkpoint\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"bf16\"\n",
-    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "8e1b1152",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-10-11T08:04:15.319747Z",
-     "iopub.status.busy": "2023-10-11T08:04:15.318636Z",
-     "iopub.status.idle": "2023-10-11T08:04:21.268526Z",
-     "shell.execute_reply": "2023-10-11T08:04:21.267073Z"
-    },
-    "papermill": {
-     "duration": 5.974644,
-     "end_time": "2023-10-11T08:04:21.271495",
-     "exception": false,
-     "start_time": "2023-10-11T08:04:15.296851",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2023-10-11 08:04:19,430] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
-      "Traceback (most recent call last):\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in <module>\r\n",
-      "    model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n",
-      "    self.model = RWKV(**model_config)\r\n",
-      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n",
-      "    raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n",
-      "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth' does not exist\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# # Lets do a quick dragon prompt validation\n",
-    "!cd \"{INFERENCE_DIR}\" && \\\n",
-    "    python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"cuda fp32\""
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  },
-  "papermill": {
-   "default_parameters": {},
-   "duration": 119.315066,
-   "end_time": "2023-10-11T08:04:21.714050",
-   "environment_variables": {},
-   "exception": null,
-   "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb",
-   "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb",
-   "parameters": {},
-   "start_time": "2023-10-11T08:02:22.398984",
-   "version": "2.4.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
\ No newline at end of file
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb
deleted file mode 100644
index 31a5b8eeab80f4fb0b5a736155d2fd141fa7fd54..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b0aa2c37ab25e53ed3e45a9e7b5b09d1ac2d2f627412df5c98cc1f113838d800
-size 15734950
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb
deleted file mode 100644
index 9810fb95056168b6f333635a6ad59587d31b6e23..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d643e2a64a0f7323eb7b14b90ce5a0e5457818349c75e666dbf52b7319f5de72
-size 15733849
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth
deleted file mode 100644
index b42c1d46426286791c4b684a05f90055dccae4d1..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:849b57b4d493d40313ef04b30ffc22ec6f5cb99e05225615ee0cb00acb78a95d
-size 1066537077
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth
deleted file mode 100644
index e671afa6d1c25ea33703bbbdf389a33493910501..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8413565273ef40f61db246dcbf793e045b39d1163e18885441be5a16d733f34c
-size 1066537077
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth
deleted file mode 100644
index b5857b83e411d72861863eda5c9c32a7132e1bfe..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:235d88b0aa939596392f2b5734a426940535816aa13106498974a809051a4c75
-size 1066537217
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth
deleted file mode 100644
index e145614e20e99af77e84454e6ef16a39a61c1d9f..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1afd8d92632792f498805ac222d159524badf4ecbcaaae597060b6bb87a53110
-size 1066538057
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth
deleted file mode 100644
index 381a48603dc68a10750a4b7d78e79594e6bde52d..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e61d8f8901d1eb50759f0242e2886678ed24b9931295a270b14120ba74cb5c3
-size 1066538057
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth
deleted file mode 100644
index ded0f392eb463040cbb0e4a66326c5ae08bcbda6..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c2d60ede71bc384ee4eff0a591b3fa57dd670c27e5e8ce5eadf25a7f0d7e226d
-size 1066538337
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth
deleted file mode 100644
index c498833cf2e305eacbd6ebd9485e9a5d6706eca2..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea
-size 1066536657
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth
deleted file mode 100644
index b1bfb4e806da5dde645c9feb2acb0b0140ce43c6..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25
-size 1066536657
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth
deleted file mode 100644
index bfe873e0bdd09173577c50c9f6f3634155ade0ce..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f10f8f00c42b6408db81a3b26d53411c41edc7f23f5097ac095ad3096d6c5dc1
-size 1066537497
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth
deleted file mode 100644
index f2aa96bd9b7f4e604e397947323f5156ee2fa129..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6f07a8414cd0cd1c3df705dff8a0f2142231171ee52a94d12c55dfe7c888fef7
-size 1066537497
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb
deleted file mode 100644
index e04ec817954792ce45a871de0ebed229db957ffd..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0fccffc430231ad06fdb02a7e50ea57acfbeae3c42a97b018f62f937d30736e4
-size 16519239
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb
deleted file mode 100644
index 955412d6f333912148d0dc1023c32ce58509ccd2..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7260b3fe80de461d6dc923b21af87361f71e26a4a7191d51dd9665403728ddfa
-size 15732960
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb
deleted file mode 100644
index 5c78f23bf3d5e33fcb6836c803015836a2da0149..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f95adf89d498a4dd58af22ba192b2fd4d08ceec250784c7e9f6f9b8de0fed2bc
-size 15855123
diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb
deleted file mode 100644
index 50ba5f3c8e80bcfb1a8005406d9e4f78979d8dac..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c308e5ae9f8fde5fd24cafccf60917dca9c97fc2e0a5fbcfa01027d6d50e927d
-size 16623766
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb
index 7e206cc12ec5ccf27fa871d361750dce04655d13..3519049547cd237105e880346b2b9338b4ca8ce6 100644
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb
+++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb
@@ -1,3 +1,4642 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0439ce7fb6866af36cb53bbddaf6a1ed49656c85a84d6a2aabd6754b30fa2109
-size 61159745
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "e6bf5eac",
+   "metadata": {
+    "papermill": {
+     "duration": 0.005682,
+     "end_time": "2023-09-14T02:37:04.293470",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:04.287788",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# RWKV v5\n",
+    "\n",
+    "Simple memory training for a small model\n",
+    "\n",
+    "**Note:** This project assumes you have the rwkv-infctx conda env setup"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "f59fa274",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003026,
+     "end_time": "2023-09-14T02:37:04.300149",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:04.297123",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Basic Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b9505f51",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:37:04.308476Z",
+     "iopub.status.busy": "2023-09-14T02:37:04.307936Z",
+     "iopub.status.idle": "2023-09-14T02:37:05.309488Z",
+     "shell.execute_reply": "2023-09-14T02:37:05.308127Z"
+    },
+    "papermill": {
+     "duration": 1.008439,
+     "end_time": "2023-09-14T02:37:05.311918",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:04.303479",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CITATION.cff  RWKV-v4wavenet\t RWKV-v5headsize2x  checkpoint\tnotebook\r\n",
+      "LICENSE       RWKV-v5\t\t RWKV-v5headsize32  datapath\toutput\r\n",
+      "README.md     RWKV-v5-beta2\t RWKV-v5rstack\t    docker\r\n",
+      "RWKV-v4neo    RWKV-v5altwavenet  RWKV-v5wavenet     model\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# First lets setup the various directories, and init the model\n",
+    "!ls ../../../../../\n",
+    "!mkdir -p ../../../../../model/\n",
+    "!mkdir -p ../../../../../datapath/\n",
+    "!mkdir -p ../../../../../checkpoint/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8d16737a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:37:05.320892Z",
+     "iopub.status.busy": "2023-09-14T02:37:05.319751Z",
+     "iopub.status.idle": "2023-09-14T02:37:08.625564Z",
+     "shell.execute_reply": "2023-09-14T02:37:08.624420Z"
+    },
+    "papermill": {
+     "duration": 3.312981,
+     "end_time": "2023-09-14T02:37:08.627991",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:05.315010",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "# Additional dependencies for eval stuff\n",
+    "!pip3 install -q aiocsv aiofiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "157915c9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:37:08.636451Z",
+     "iopub.status.busy": "2023-09-14T02:37:08.635808Z",
+     "iopub.status.idle": "2023-09-14T02:37:08.646285Z",
+     "shell.execute_reply": "2023-09-14T02:37:08.644957Z"
+    },
+    "papermill": {
+     "duration": 0.017165,
+     "end_time": "2023-09-14T02:37:08.648279",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:08.631114",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DEEPSPEED_STRAT: deepspeed_stage_1\n",
+      "ENABLE_WANDB: True\n",
+      "GPU_DEVICES: auto\n",
+      "DIR_NAME: L6-D2048-E1e-1-ctx4k\n",
+      "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k\n",
+      "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
+      "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
+      "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
+     ]
+    }
+   ],
+   "source": [
+    "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n",
+    "GPU_DEVICES=\"auto\"\n",
+    "ENABLE_WANDB=True\n",
+    "\n",
+    "# Layer count and embed dim to start with\n",
+    "LAYER_COUNT=6\n",
+    "EMBED_DIM=2048\n",
+    "\n",
+    "EMBED_SCALE=0.1\n",
+    "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
+    "\n",
+    "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
+    "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
+    "\n",
+    "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
+    "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
+    "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
+    "\n",
+    "if ENABLE_WANDB:\n",
+    "    WANDB_MODE=\"online\"\n",
+    "else:\n",
+    "    WANDB_MODE=\"disabled\"\n",
+    "\n",
+    "# Computing the notebook, and various paths\n",
+    "import os\n",
+    "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
+    "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n",
+    "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n",
+    "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
+    "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
+    "\n",
+    "# Get the notebook dir name\n",
+    "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n",
+    "\n",
+    "# Log names and dir\n",
+    "print(\"DIR_NAME:\", DIR_NAME)\n",
+    "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
+    "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
+    "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
+    "print(\"PROJECT_DIR:\", PROJECT_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ed6bf7ff",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:37:08.656415Z",
+     "iopub.status.busy": "2023-09-14T02:37:08.655852Z",
+     "iopub.status.idle": "2023-09-14T02:37:32.430048Z",
+     "shell.execute_reply": "2023-09-14T02:37:32.428850Z"
+    },
+    "papermill": {
+     "duration": 23.781188,
+     "end_time": "2023-09-14T02:37:32.432519",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:08.651331",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2023-09-14 02:37:08--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n",
+      "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.87, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... 302 Found\r\n",
+      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694918228&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxODIyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=cJAoUY5y0W5uDSWebon3c0434JSN%7EgNHU8QPvHA1bl1fW7kXK0ETDur-X-85BKCXlTHdhzjGRWoxRQGb33uDdG35IvOOksMyaVFYfnyj0JA66Bh9q%7E35mFanEks9Ja7QfTFOyrfWlndyFOT0M5Hzx-rJQ-nLDBne1LfEZEwxt7Uv2jsFCYkukWDP1f-OwfqwTb1q4Ys7knlGyj1ZQ4sq45v6cFcJAXU8R8GUhEd5j8vg9bnxtYKZvYqJuZcX8T1w%7EQJ5DJK0l9lYIY0JIiqZr4tCNkjD6PbTvnVA7E8TQys0Hjgf0o291i9ruANc6bwjWcGOpPeBo4QI24aWO9Fxlg__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
+      "--2023-09-14 02:37:08--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694918228&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxODIyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=cJAoUY5y0W5uDSWebon3c0434JSN%7EgNHU8QPvHA1bl1fW7kXK0ETDur-X-85BKCXlTHdhzjGRWoxRQGb33uDdG35IvOOksMyaVFYfnyj0JA66Bh9q%7E35mFanEks9Ja7QfTFOyrfWlndyFOT0M5Hzx-rJQ-nLDBne1LfEZEwxt7Uv2jsFCYkukWDP1f-OwfqwTb1q4Ys7knlGyj1ZQ4sq45v6cFcJAXU8R8GUhEd5j8vg9bnxtYKZvYqJuZcX8T1w%7EQJ5DJK0l9lYIY0JIiqZr4tCNkjD6PbTvnVA7E8TQys0Hjgf0o291i9ruANc6bwjWcGOpPeBo4QI24aWO9Fxlg__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
+      "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "108.138.64.49, 108.138.64.121, 108.138.64.111, ...\r\n",
+      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.49|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "200 OK\r\n",
+      "Length: 1066537217 (1017M) [binary/octet-stream]\r\n",
+      "Saving to: ‘v5r3-L6-D2048-E0_1-mem-ctx-512.pth’\r\n",
+      "\r\n",
+      "\r",
+      "          v5r3-L6-D   0%[                    ]       0  --.-KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2   1%[                    ]  14.74M  63.3MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D20   2%[                    ]  24.19M  47.2MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D204   3%[                    ]  30.52M  38.2MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2048   4%[                    ]  45.26M  42.8MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2048-   5%[>                   ]  54.89M  43.6MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2048-E   6%[>                   ]  61.03M  41.1MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2048-E0   7%[>                   ]  75.78M  43.5MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2048-E0_   8%[>                   ]  87.34M  44.9MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2048-E0_1   9%[>                   ]  91.55M  41.4MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2048-E0_1-  10%[=>                  ] 106.29M  43.3MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2048-E0_1-m  11%[=>                  ] 114.75M  43.2MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2048-E0_1-me  11%[=>                  ] 121.56M  42.5MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2048-E0_1-mem  12%[=>                  ] 122.07M  39.8MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2048-E0_1-mem-  13%[=>                  ] 137.33M  42.0MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2048-E0_1-mem-c  14%[=>                  ] 152.07M  43.6MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2048-E0_1-mem-ct  15%[==>                 ] 152.59M  41.2MB/s    eta 22s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2048-E0_1-mem-ctx  16%[==>                 ] 167.33M  41.1MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2048-E0_1-mem-ctx-  16%[==>                 ] 167.85M  37.9MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2048-E0_1-mem-ctx-5  17%[==>                 ] 181.27M  39.7MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "048-E0_1-mem-ctx-51  18%[==>                 ] 183.10M  37.2MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "48-E0_1-mem-ctx-512  19%[==>                 ] 198.36M  38.0MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "8-E0_1-mem-ctx-512.  20%[===>                ] 213.11M  40.5MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  22%[===>                ] 228.36M  41.5MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  23%[===>                ] 243.09M  42.1MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  24%[===>                ] 254.13M  44.3MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   25%[====>               ] 259.40M  41.5MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    27%[====>               ] 274.66M  42.2MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     28%[====>               ] 289.40M  44.8MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      29%[====>               ] 298.47M  43.8MB/s    eta 18s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       30%[=====>              ] 305.18M  42.3MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        30%[=====>              ] 313.62M  39.8MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         32%[=====>              ] 333.86M  42.5MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          33%[=====>              ] 341.22M  44.0MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           34%[=====>              ] 350.95M  45.4MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            35%[======>             ] 365.70M  44.4MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             36%[======>             ] 371.37M  45.7MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              37%[======>             ] 381.47M  44.0MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               39%[======>             ] 396.73M  45.9MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                41%[=======>            ] 419.01M  47.8MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 42%[=======>            ] 427.25M  47.9MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  43%[=======>            ] 442.51M  47.9MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   45%[========>           ] 457.76M  46.8MB/s    eta 15s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    46%[========>           ] 473.02M  47.8MB/s    eta 12s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     48%[========>           ] 488.28M  47.6MB/s    eta 12s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  49%[========>           ] 507.63M  50.4MB/s    eta 12s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  50%[=========>          ] 512.48M  51.1MB/s    eta 12s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  52%[=========>          ] 532.75M  54.5MB/s    eta 12s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  52%[=========>          ] 534.05M  51.4MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  53%[=========>          ] 547.49M  52.6MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  54%[=========>          ] 557.13M  52.7MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  56%[==========>         ] 569.63M  54.0MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  56%[==========>         ] 579.31M  55.5MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  58%[==========>         ] 594.57M  54.4MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  59%[==========>         ] 600.21M  54.9MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D20  59%[==========>         ] 602.76M  51.4MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D204  60%[===========>        ] 610.35M  48.1MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2048  61%[===========>        ] 625.09M  48.0MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2048-  62%[===========>        ] 640.36M  48.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2048-E  64%[===========>        ] 655.62M  47.5MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2048-E0  64%[===========>        ] 656.25M  43.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2048-E0_  65%[============>       ] 669.55M  42.5MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2048-E0_1  66%[============>       ] 678.94M  43.2MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2048-E0_1-  67%[============>       ] 686.64M  42.0MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2048-E0_1-m  69%[============>       ] 701.90M  42.1MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2048-E0_1-me  70%[=============>      ] 716.64M  42.5MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2048-E0_1-mem  70%[=============>      ] 717.16M  41.4MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2048-E0_1-mem-  71%[=============>      ] 724.85M  39.7MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2048-E0_1-mem-c  72%[=============>      ] 732.42M  38.1MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2048-E0_1-mem-ct  73%[=============>      ] 747.69M  38.4MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2048-E0_1-mem-ctx  75%[==============>     ] 762.94M  40.7MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2048-E0_1-mem-ctx-  76%[==============>     ] 777.68M  43.3MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2048-E0_1-mem-ctx-5  77%[==============>     ] 792.94M  42.8MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "048-E0_1-mem-ctx-51  78%[==============>     ] 793.46M  39.6MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "48-E0_1-mem-ctx-512  79%[==============>     ] 808.20M  41.4MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "8-E0_1-mem-ctx-512.  79%[==============>     ] 812.25M  39.4MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  81%[===============>    ] 823.97M  40.9MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  82%[===============>    ] 838.71M  42.4MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  82%[===============>    ] 839.35M  41.6MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   85%[================>   ] 866.03M  44.5MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    85%[================>   ] 869.75M  42.1MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     87%[================>   ] 885.01M  42.8MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      87%[================>   ] 893.44M  44.4MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       88%[================>   ] 900.27M  43.8MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        89%[================>   ] 911.78M  42.6MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         90%[=================>  ] 915.53M  41.5MB/s    eta 3s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          91%[=================>  ] 930.78M  40.9MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           92%[=================>  ] 944.21M  41.4MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            94%[=================>  ] 956.92M  42.2MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             94%[=================>  ] 961.30M  42.7MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              96%[==================> ] 980.99M  44.8MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               97%[==================> ] 986.93M  42.2MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                98%[==================> ]   1004M  44.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 99%[==================> ]   1007M  42.6MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2048-E0_1- 100%[===================>]   1017M  45.2MB/s    in 23s     \r\n",
+      "\r\n",
+      "2023-09-14 02:37:32 (44.1 MB/s) - ‘v5r3-L6-D2048-E0_1-mem-ctx-512.pth’ saved [1066537217/1066537217]\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 1018M\r\n",
+      "drwxr-xr-x  2 root root  4.0K Sep 14 02:37 .\r\n",
+      "drwxr-xr-x 20 root root  4.0K Sep 14 02:37 ..\r\n",
+      "-rw-r--r--  1 root root 1018M Sep 13 20:28 v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5700b4b5",
+   "metadata": {
+    "papermill": {
+     "duration": 0.008064,
+     "end_time": "2023-09-14T02:37:32.449084",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:32.441020",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 3: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d5f911bc",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:37:32.468291Z",
+     "iopub.status.busy": "2023-09-14T02:37:32.467681Z",
+     "iopub.status.idle": "2023-09-14T02:37:52.531727Z",
+     "shell.execute_reply": "2023-09-14T02:37:52.530635Z"
+    },
+    "papermill": {
+     "duration": 20.125122,
+     "end_time": "2023-09-14T02:37:52.582572",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:32.457450",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 374 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 869 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 528 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1060 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 747 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1301 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 100 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 650 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 81 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1794 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 529 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 328 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 352 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 293 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 26 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 313 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 264 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 184 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 439 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 401 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 280 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 2607 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 482 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 267 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 48 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5563 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 140 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 6.1G\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  21K Sep 14 02:37 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 2.1M Sep 14 02:37 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20M Sep 14 02:37 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  22M Sep 14 02:37 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  23M Sep 14 02:37 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25M Sep 14 02:37 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27M Sep 14 02:37 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  24K Sep 14 02:37 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29M Sep 14 02:37 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31M Sep 14 02:37 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33M Sep 14 02:37 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35M Sep 14 02:37 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  37M Sep 14 02:37 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Sep 14 02:37 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 4.0M Sep 14 02:37 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39M Sep 14 02:37 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  41M Sep 14 02:37 gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42M Sep 14 02:37 gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44M Sep 14 02:37 gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  46M Sep 14 02:37 gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35K Sep 14 02:37 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  48M Sep 14 02:37 gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  50M Sep 14 02:37 gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  52M Sep 14 02:37 gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54M Sep 14 02:37 gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  56M Sep 14 02:37 gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39K Sep 14 02:37 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 5.9M Sep 14 02:37 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58M Sep 14 02:37 gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  60M Sep 14 02:37 gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  61M Sep 14 02:37 gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  63M Sep 14 02:37 gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65M Sep 14 02:37 gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  45K Sep 14 02:37 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  67M Sep 14 02:37 gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69M Sep 14 02:37 gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  71M Sep 14 02:37 gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73M Sep 14 02:37 gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  75M Sep 14 02:37 gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  49K Sep 14 02:37 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 7.8M Sep 14 02:37 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77M Sep 14 02:37 gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79M Sep 14 02:37 gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  80M Sep 14 02:37 gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82M Sep 14 02:37 gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84M Sep 14 02:37 gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54K Sep 14 02:37 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86M Sep 14 02:37 gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88M Sep 14 02:37 gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90M Sep 14 02:37 gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  92M Sep 14 02:37 gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94M Sep 14 02:37 gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  15K Sep 14 02:37 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  57K Sep 14 02:37 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 9.7M Sep 14 02:37 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  96M Sep 14 02:37 gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  97M Sep 14 02:37 gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99M Sep 14 02:37 gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101M Sep 14 02:37 gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103M Sep 14 02:37 gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  62K Sep 14 02:37 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 105M Sep 14 02:37 gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 107M Sep 14 02:37 gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 109M Sep 14 02:37 gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 111M Sep 14 02:37 gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 113M Sep 14 02:37 gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  68K Sep 14 02:37 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  12M Sep 14 02:37 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 115M Sep 14 02:37 gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117M Sep 14 02:37 gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 118M Sep 14 02:37 gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 120M Sep 14 02:37 gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 122M Sep 14 02:37 gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73K Sep 14 02:37 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 124M Sep 14 02:37 gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 126M Sep 14 02:37 gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 128M Sep 14 02:37 gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 130M Sep 14 02:37 gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 132M Sep 14 02:37 gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79K Sep 14 02:37 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  14M Sep 14 02:37 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134M Sep 14 02:37 gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 136M Sep 14 02:37 gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 137M Sep 14 02:37 gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 139M Sep 14 02:37 gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 141M Sep 14 02:37 gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  83K Sep 14 02:37 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 143M Sep 14 02:37 gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 145M Sep 14 02:37 gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 147M Sep 14 02:37 gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 149M Sep 14 02:37 gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 151M Sep 14 02:37 gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86K Sep 14 02:37 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  16M Sep 14 02:37 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 153M Sep 14 02:37 gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88K Sep 14 02:37 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101K Sep 14 02:37 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  18M Sep 14 02:37 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101K Sep 14 02:37 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 500K Sep 14 02:37 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 281K Sep 14 02:37 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 525K Sep 14 02:37 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 02:37 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 524K Sep 14 02:37 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 432K Sep 14 02:37 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 02:37 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Sep 14 02:37 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 393K Sep 14 02:37 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 540K Sep 14 02:37 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 02:37 shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Sep 14 02:37 shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 517K Sep 14 02:37 shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 357K Sep 14 02:37 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep 14 02:37 shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 512K Sep 14 02:37 shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 510K Sep 14 02:37 shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 334K Sep 14 02:37 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 534K Sep 14 02:37 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 322K Sep 14 02:37 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 314K Sep 14 02:37 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 527K Sep 14 02:37 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 317K Sep 14 02:37 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 822K Sep 14 02:37 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 311K Sep 14 02:37 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 526K Sep 14 02:37 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 301K Sep 14 02:37 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 300K Sep 14 02:37 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 527K Sep 14 02:37 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 297K Sep 14 02:37 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 297K Sep 14 02:37 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 530K Sep 14 02:37 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 284K Sep 14 02:37 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 02:37 shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 291K Sep 14 02:37 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 526K Sep 14 02:37 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 02:37 shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 283K Sep 14 02:37 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 281K Sep 14 02:37 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 524K Sep 14 02:37 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 286K Sep 14 02:37 shuffle-word-95-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Go to config dir\n",
+    "cd \"../\"\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "for i in {5..100..5} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 100+ - 4200 words dataset\n",
+    "# \n",
+    "for i in {100..8000..100} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -lh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "af9b83d3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:37:52.652229Z",
+     "iopub.status.busy": "2023-09-14T02:37:52.651608Z",
+     "iopub.status.idle": "2023-09-14T02:38:21.558685Z",
+     "shell.execute_reply": "2023-09-14T02:38:21.557835Z"
+    },
+    "papermill": {
+     "duration": 28.945501,
+     "end_time": "2023-09-14T02:38:21.561935",
+     "exception": false,
+     "start_time": "2023-09-14T02:37:52.616434",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-09-14 02:37:56,909] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1547623296\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 1547623296\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230914_023759-9o2jwwvs\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/9o2jwwvs\u001b[0m\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:554: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GPU available: True (cuda), used: True\r\n",
+      "TPU available: False, using: 0 TPU cores\r\n",
+      "IPU available: False, using: 0 IPUs\r\n",
+      "HPU available: False, using: 0 HPUs\r\n",
+      "\r\n",
+      "\r\n",
+      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
+      "   - target_batch_size:       256\r\n",
+      "   - num_nodes:               1\r\n",
+      "   - num_devices:             1\r\n",
+      "   - accumulate_grad_batches: 256\r\n",
+      "   - effective_batch_size:    256\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Resolving data files:   0%|                             | 0/198 [00:00<?, ?it/s]\r",
+      "Resolving data files: 100%|███████████████| 198/198 [00:00<00:00, 176959.77it/s]\r\n",
+      "\r",
+      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 279.12it/s]\r\n",
+      "\r",
+      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00, 16.15it/s]\r\n",
+      "\r",
+      "Generating train split: 0 examples [00:00, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to read file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset/gen-word-100-count.jsonl' with error <class 'pyarrow.lib.ArrowInvalid'>: JSON parse error: Missing a comma or '}' after an object member. in row 27\r\n",
+      "\r",
+      "Generating train split: 100 examples [00:01, 58.74 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 4119 examples [00:01, 3148.90 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 9991 examples [00:01, 8377.41 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 16136 examples [00:02, 14631.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 21292 examples [00:02, 19123.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 42124 examples [00:02, 48567.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 50633 examples [00:02, 48135.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 59623 examples [00:02, 55584.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 67337 examples [00:02, 53321.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 74360 examples [00:02, 50060.00 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 80571 examples [00:03, 46642.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 85925 examples [00:03, 46695.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 91087 examples [00:03, 42287.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 95851 examples [00:03, 41277.70 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 100328 examples [00:03, 41903.98 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 105109 examples [00:03, 43330.01 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 109873 examples [00:03, 38887.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 114106 examples [00:03, 38800.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 118536 examples [00:04, 40082.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 122694 examples [00:04, 38268.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 127281 examples [00:04, 38358.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 131204 examples [00:04, 37584.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 135020 examples [00:04, 32997.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 139737 examples [00:04, 35623.56 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 143828 examples [00:04, 36646.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 147603 examples [00:04, 34959.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 151330 examples [00:05, 28703.38 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 154519 examples [00:05, 26946.66 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 157445 examples [00:05, 25877.83 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 160186 examples [00:05, 24367.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 162774 examples [00:05, 23524.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 165326 examples [00:05, 21662.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 167801 examples [00:05, 18798.65 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 169818 examples [00:06, 17953.36 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 171755 examples [00:06, 11617.64 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 171974 examples [00:06, 26184.81 examples/s]\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "multiprocess.pool.RemoteTraceback: \r\n",
+      "\"\"\"\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 144, in _generate_tables\r\n",
+      "    dataset = json.load(f)\r\n",
+      "  File \"/usr/lib/python3.10/json/__init__.py\", line 293, in load\r\n",
+      "    return loads(fp.read(),\r\n",
+      "  File \"/usr/lib/python3.10/json/__init__.py\", line 346, in loads\r\n",
+      "    return _default_decoder.decode(s)\r\n",
+      "  File \"/usr/lib/python3.10/json/decoder.py\", line 340, in decode\r\n",
+      "    raise JSONDecodeError(\"Extra data\", s, end)\r\n",
+      "json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 1231)\r\n",
+      "\r\n",
+      "During handling of the above exception, another exception occurred:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1925, in _prepare_split_single\r\n",
+      "    for _, table in generator:\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 147, in _generate_tables\r\n",
+      "    raise e\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 121, in _generate_tables\r\n",
+      "    pa_table = paj.read_json(\r\n",
+      "  File \"pyarrow/_json.pyx\", line 258, in pyarrow._json.read_json\r\n",
+      "  File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\r\n",
+      "  File \"pyarrow/error.pxi\", line 100, in pyarrow.lib.check_status\r\n",
+      "pyarrow.lib.ArrowInvalid: JSON parse error: Missing a comma or '}' after an object member. in row 27\r\n",
+      "\r\n",
+      "The above exception was the direct cause of the following exception:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 125, in worker\r\n",
+      "    result = (True, func(*args, **kwds))\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1347, in _write_generator_to_queue\r\n",
+      "    for i, result in enumerate(func(**kwargs)):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1958, in _prepare_split_single\r\n",
+      "    raise DatasetGenerationError(\"An error occurred while generating the dataset\") from e\r\n",
+      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n",
+      "\"\"\"\r\n",
+      "\r\n",
+      "The above exception was the direct cause of the following exception:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n",
+      "    self._run_subcommand(self.subcommand)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n",
+      "    fn(**fn_kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n",
+      "    call._call_and_handle_interrupt(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n",
+      "    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n",
+      "    return function(*args, **kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n",
+      "    self._run(model, ckpt_path=ckpt_path)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n",
+      "    self._data_connector.prepare_data()\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n",
+      "    call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n",
+      "    return fn(*args, **kwargs)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n",
+      "    prepare_data_static(**self._init_locals)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n",
+      "    src_dataset = load_dataset(**load_dataset_params)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2136, in load_dataset\r\n",
+      "    builder_instance.download_and_prepare(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 954, in download_and_prepare\r\n",
+      "    self._download_and_prepare(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1049, in _download_and_prepare\r\n",
+      "    self._prepare_split(split_generator, **prepare_split_kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1842, in _prepare_split\r\n",
+      "    for job_id, done, content in iflatmap_unordered(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in iflatmap_unordered\r\n",
+      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in <listcomp>\r\n",
+      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 774, in get\r\n",
+      "    raise self._value\r\n",
+      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: - 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.005 MB of 0.016 MB uploaded (0.000 MB deduped)\r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.005 MB of 0.016 MB uploaded (0.000 MB deduped)\r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/9o2jwwvs\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v54\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230914_023759-9o2jwwvs/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    python3 lightning_trainer.py fit \\\n",
+    "        -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n",
+    "        --model.lr_init=4e-4 \\\n",
+    "        --model.lr_final=2e-4 \\\n",
+    "        --data.max_token_size=8192 \\\n",
+    "        --data.sort_by_length=True \\\n",
+    "        --model.ctx_len=4096 \\\n",
+    "        --model.bptt_learning_range=2 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "6db19b87",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:38:21.728358Z",
+     "iopub.status.busy": "2023-09-14T02:38:21.727768Z",
+     "iopub.status.idle": "2023-09-14T02:38:25.677206Z",
+     "shell.execute_reply": "2023-09-14T02:38:25.676074Z"
+    },
+    "papermill": {
+     "duration": 3.986198,
+     "end_time": "2023-09-14T02:38:25.679730",
+     "exception": false,
+     "start_time": "2023-09-14T02:38:21.693532",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-09-14 02:38:24,136] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python3 export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ec391cb3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:38:25.748202Z",
+     "iopub.status.busy": "2023-09-14T02:38:25.747247Z",
+     "iopub.status.idle": "2023-09-14T02:38:26.013713Z",
+     "shell.execute_reply": "2023-09-14T02:38:26.012573Z"
+    },
+    "papermill": {
+     "duration": 0.303358,
+     "end_time": "2023-09-14T02:38:26.016073",
+     "exception": false,
+     "start_time": "2023-09-14T02:38:25.712715",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "2748101d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T02:38:26.085118Z",
+     "iopub.status.busy": "2023-09-14T02:38:26.084169Z",
+     "iopub.status.idle": "2023-09-14T02:38:26.352535Z",
+     "shell.execute_reply": "2023-09-14T02:38:26.351341Z"
+    },
+    "papermill": {
+     "duration": 0.305573,
+     "end_time": "2023-09-14T02:38:26.354898",
+     "exception": false,
+     "start_time": "2023-09-14T02:38:26.049325",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 83.790874,
+   "end_time": "2023-09-14T02:38:26.808961",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb",
+   "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb",
+   "parameters": {},
+   "start_time": "2023-09-14T02:37:03.018087",
+   "version": "2.4.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb
deleted file mode 100644
index 7ffd7172b586e0b8857d504b71f3808861d311aa..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3855c5ed19072f2ecaec4294c3945a3290e692d15c0aa7351c7d4917404fbf65
-size 38208798
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb
deleted file mode 100644
index 2b40078b2fa26fff7d07cf99fa6ea18a8e9a50b1..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e318ecc4d20d89232f8a0677c54f8489d602cb3d66632cadb29c028c917eb00e
-size 30322339
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth
deleted file mode 100644
index 8af2cc985147220953dab23cbe6635297e1208ab..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cf3a15d56db013d138bed6780d58c4362ca96b3ef98fb98e2d1444f325c582b5
-size 1066537077
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth
deleted file mode 100644
index ec338ad0a412b427fe4cc7115959eb96face9f73..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:76cc2d79013781f18c6507848a138150084d37c3aae0f20145e7e5854bcabb99
-size 1066537077
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth
deleted file mode 100644
index ae6d693b3aafaef43037882c09d0bebc48f35c22..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2f1b8811c3f2f8c12c564edb973392bb486c9d674152f3eb769ac206c20bcfc0
-size 1066537077
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth
deleted file mode 100644
index 18bf2327f7771b99c67b0d997cbdf1ba8f34902d..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:db7ab09a447f96d9fcdbee4761bd35f2c3bce9868d3136959ed601a8e478083c
-size 1066537077
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb
index 67972444f9ed3d06fa1d595845dd1bbdebb54e5f..0039bc5a6fe58a8763dff759d499ede5e41c832a 100644
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb
+++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb
@@ -1,3 +1,7683 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:46167bb1edba84bd184533d3d424a8c4d9ed5a15d8b93474596453f235462f13
-size 58866814
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "158fdf2c",
+   "metadata": {
+    "papermill": {
+     "duration": 0.004282,
+     "end_time": "2023-09-14T00:21:48.785213",
+     "exception": false,
+     "start_time": "2023-09-14T00:21:48.780931",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# RWKV v5\n",
+    "\n",
+    "Simple memory training for a small model\n",
+    "\n",
+    "**Note:** This project assumes you have the rwkv-infctx conda env setup"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "792a73e5",
+   "metadata": {
+    "papermill": {
+     "duration": 0.002651,
+     "end_time": "2023-09-14T00:21:48.790826",
+     "exception": false,
+     "start_time": "2023-09-14T00:21:48.788175",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Basic Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "761b91e0",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:21:48.796049Z",
+     "iopub.status.busy": "2023-09-14T00:21:48.795747Z",
+     "iopub.status.idle": "2023-09-14T00:21:49.675797Z",
+     "shell.execute_reply": "2023-09-14T00:21:49.674913Z"
+    },
+    "papermill": {
+     "duration": 0.88445,
+     "end_time": "2023-09-14T00:21:49.677690",
+     "exception": false,
+     "start_time": "2023-09-14T00:21:48.793240",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CITATION.cff  RWKV-v4wavenet\t RWKV-v5headsize2x  checkpoint\tnotebook\r\n",
+      "LICENSE       RWKV-v5\t\t RWKV-v5headsize32  datapath\toutput\r\n",
+      "README.md     RWKV-v5-beta2\t RWKV-v5rstack\t    docker\r\n",
+      "RWKV-v4neo    RWKV-v5altwavenet  RWKV-v5wavenet     model\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# First lets setup the various directories, and init the model\n",
+    "!ls ../../../../../\n",
+    "!mkdir -p ../../../../../model/\n",
+    "!mkdir -p ../../../../../datapath/\n",
+    "!mkdir -p ../../../../../checkpoint/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d8775637",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:21:49.684909Z",
+     "iopub.status.busy": "2023-09-14T00:21:49.684672Z",
+     "iopub.status.idle": "2023-09-14T00:21:51.806309Z",
+     "shell.execute_reply": "2023-09-14T00:21:51.805544Z"
+    },
+    "papermill": {
+     "duration": 2.127403,
+     "end_time": "2023-09-14T00:21:51.808325",
+     "exception": false,
+     "start_time": "2023-09-14T00:21:49.680922",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "# Additional dependencies for eval stuff\n",
+    "!pip3 install -q aiocsv aiofiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "bda1d282",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:21:51.816177Z",
+     "iopub.status.busy": "2023-09-14T00:21:51.815928Z",
+     "iopub.status.idle": "2023-09-14T00:21:51.824598Z",
+     "shell.execute_reply": "2023-09-14T00:21:51.823913Z"
+    },
+    "papermill": {
+     "duration": 0.014641,
+     "end_time": "2023-09-14T00:21:51.826277",
+     "exception": false,
+     "start_time": "2023-09-14T00:21:51.811636",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DEEPSPEED_STRAT: deepspeed_stage_1\n",
+      "ENABLE_WANDB: True\n",
+      "GPU_DEVICES: auto\n",
+      "DIR_NAME: L6-D2560-E1e-1-ctx4k\n",
+      "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k\n",
+      "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
+      "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
+      "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
+     ]
+    }
+   ],
+   "source": [
+    "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n",
+    "GPU_DEVICES=\"auto\"\n",
+    "ENABLE_WANDB=True\n",
+    "\n",
+    "# Layer count and embed dim to start with\n",
+    "LAYER_COUNT=6\n",
+    "EMBED_DIM=2560\n",
+    "\n",
+    "EMBED_SCALE=0.1\n",
+    "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
+    "\n",
+    "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
+    "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
+    "\n",
+    "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
+    "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
+    "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
+    "\n",
+    "if ENABLE_WANDB:\n",
+    "    WANDB_MODE=\"online\"\n",
+    "else:\n",
+    "    WANDB_MODE=\"disabled\"\n",
+    "\n",
+    "# Computing the notebook, and various paths\n",
+    "import os\n",
+    "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
+    "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n",
+    "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n",
+    "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
+    "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
+    "\n",
+    "# Get the notebook dir name\n",
+    "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n",
+    "\n",
+    "# Log names and dir\n",
+    "print(\"DIR_NAME:\", DIR_NAME)\n",
+    "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
+    "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
+    "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
+    "print(\"PROJECT_DIR:\", PROJECT_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "951b741e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:21:51.833577Z",
+     "iopub.status.busy": "2023-09-14T00:21:51.833353Z",
+     "iopub.status.idle": "2023-09-14T00:23:39.537522Z",
+     "shell.execute_reply": "2023-09-14T00:23:39.536676Z"
+    },
+    "papermill": {
+     "duration": 107.709983,
+     "end_time": "2023-09-14T00:23:39.539513",
+     "exception": false,
+     "start_time": "2023-09-14T00:21:51.829530",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2023-09-14 00:21:51--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth\r\n",
+      "Resolving huggingface.co (huggingface.co)... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "13.33.33.55, 13.33.33.110, 13.33.33.102, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "302 Found\r\n",
+      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2560-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2560-E0_1-mem-ctx-512.pth%22%3B&Expires=1694910112&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxMDExMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhY2QzYTg5MTM3MTBlN2MxNzcxOTU0N2M1NWRjZGUwMjgyNmNlMmQ1OTI2MjZjMDMzOWU0MmIzOTQ4NTg0OTg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WM4HZnIOKrH24paW4nOk1cHO9YHki8seMtQ6g3vGWI7sYyvPtz%7EXzbI4q%7EME0hvvhjAcVa1%7EUWwlWKF4I1ek7wHZOZ9ySyH0VaZ4HCTI0Zx9XlaT%7E62wMWO854tDrU5iHFVfMP59Rr%7EbQCkiwanrgwg5NC8iCw7uL5t2a-LvME3l0m65K5SzgC-0IEn4nVrXpnvdCmNaBNSNecwoP8yEYIv-0%7E-yeTK0j7dVnIifdmJY6pB4UiIPfOU--LckTIv8c%7EvvUtg4DWRMEspqC%7E%7EOquCGo3OAUgIZVvyhY9nzaWsJbQoRv3DyaWyuh8nKZW%7E8-99fEW8tfubjL3gqQBLqOw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
+      "--2023-09-14 00:21:52--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2560-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2560-E0_1-mem-ctx-512.pth%22%3B&Expires=1694910112&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxMDExMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhY2QzYTg5MTM3MTBlN2MxNzcxOTU0N2M1NWRjZGUwMjgyNmNlMmQ1OTI2MjZjMDMzOWU0MmIzOTQ4NTg0OTg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WM4HZnIOKrH24paW4nOk1cHO9YHki8seMtQ6g3vGWI7sYyvPtz%7EXzbI4q%7EME0hvvhjAcVa1%7EUWwlWKF4I1ek7wHZOZ9ySyH0VaZ4HCTI0Zx9XlaT%7E62wMWO854tDrU5iHFVfMP59Rr%7EbQCkiwanrgwg5NC8iCw7uL5t2a-LvME3l0m65K5SzgC-0IEn4nVrXpnvdCmNaBNSNecwoP8yEYIv-0%7E-yeTK0j7dVnIifdmJY6pB4UiIPfOU--LckTIv8c%7EvvUtg4DWRMEspqC%7E%7EOquCGo3OAUgIZVvyhY9nzaWsJbQoRv3DyaWyuh8nKZW%7E8-99fEW8tfubjL3gqQBLqOw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
+      "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "18.155.68.128, 18.155.68.94, 18.155.68.73, ...\r\n",
+      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.128|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "200 OK\r\n",
+      "Length: 1537632513 (1.4G) [binary/octet-stream]\r\n",
+      "Saving to: ‘v5r3-L6-D2560-E0_1-mem-ctx-512.pth’\r\n",
+      "\r\n",
+      "\r",
+      "          v5r3-L6-D   0%[                    ]       0  --.-KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2   0%[                    ]  18.27K  81.1KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25   0%[                    ]  58.27K   129KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256   0%[                    ] 135.27K   199KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560   0%[                    ] 296.27K   327KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-   0%[                    ] 602.27K   532KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E   0%[                    ]   1.20M   907KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0   0%[                    ]   2.42M  1.52MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_   0%[                    ]   4.85M  2.68MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1   0%[                    ]   8.65M  4.24MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-   0%[                    ]  12.56M  5.53MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m   1%[                    ]  16.43M  6.58MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me   1%[                    ]  20.21M  7.41MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem   1%[                    ]  24.07M  8.14MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-   1%[                    ]  27.84M  8.74MB/s    eta 2m 45s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c   2%[                    ]  31.63M  9.26MB/s    eta 2m 45s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct   2%[                    ]  35.51M  9.74MB/s    eta 2m 45s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx   2%[                    ]  39.34M  10.2MB/s    eta 2m 45s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-   2%[                    ]  43.20M  10.5MB/s    eta 2m 45s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5   3%[                    ]  47.09M  10.9MB/s    eta 2m 11s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51   3%[                    ]  50.88M  11.2MB/s    eta 2m 11s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512   3%[                    ]  54.60M  12.0MB/s    eta 2m 11s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.   3%[                    ]  58.48M  12.8MB/s    eta 2m 11s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p   4%[                    ]  62.38M  13.6MB/s    eta 2m 11s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt   4%[                    ]  66.21M  14.4MB/s    eta 1m 56s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth   4%[                    ]  70.13M  15.2MB/s    eta 1m 56s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth    5%[>                   ]  73.90M  15.9MB/s    eta 1m 56s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth     5%[>                   ]  77.82M  16.5MB/s    eta 1m 56s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth      5%[>                   ]  81.73M  16.8MB/s    eta 1m 56s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth       5%[>                   ]  85.54M  16.8MB/s    eta 1m 47s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth        6%[>                   ]  89.46M  16.8MB/s    eta 1m 47s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth         6%[>                   ]  93.34M  16.8MB/s    eta 1m 47s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth          6%[>                   ]  97.13M  16.8MB/s    eta 1m 47s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth           6%[>                   ] 101.04M  16.8MB/s    eta 1m 47s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth            7%[>                   ] 104.82M  16.8MB/s    eta 1m 41s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth             7%[>                   ] 108.63M  16.8MB/s    eta 1m 41s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth              7%[>                   ] 112.42M  16.8MB/s    eta 1m 41s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth               7%[>                   ] 116.21M  16.8MB/s    eta 1m 41s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth                7%[>                   ] 117.20M  16.1MB/s    eta 1m 41s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                 8%[>                   ] 120.17M  15.9MB/s    eta 1m 40s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                  8%[>                   ] 123.92M  15.9MB/s    eta 1m 40s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                   8%[>                   ] 127.67M  15.9MB/s    eta 1m 40s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                    8%[>                   ] 130.81M  15.8MB/s    eta 1m 40s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                     9%[>                   ] 134.63M  15.8MB/s    eta 1m 40s "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                      9%[>                   ] 138.42M  15.8MB/s    eta 97s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v   9%[>                   ] 142.31M  15.7MB/s    eta 97s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5   9%[>                   ] 145.38M  15.6MB/s    eta 97s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  10%[=>                  ] 149.13M  15.6MB/s    eta 97s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  10%[=>                  ] 152.09M  15.4MB/s    eta 97s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  10%[=>                  ] 155.88M  15.3MB/s    eta 94s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  10%[=>                  ] 159.67M  15.3MB/s    eta 94s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  11%[=>                  ] 162.78M  15.1MB/s    eta 94s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  11%[=>                  ] 166.62M  15.2MB/s    eta 94s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  11%[=>                  ] 170.42M  15.1MB/s    eta 94s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  11%[=>                  ] 174.28M  15.1MB/s    eta 92s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  12%[=>                  ] 178.38M  15.2MB/s    eta 92s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  12%[=>                  ] 182.15M  15.2MB/s    eta 92s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  12%[=>                  ] 185.90M  15.2MB/s    eta 92s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  12%[=>                  ] 189.65M  15.8MB/s    eta 92s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  13%[=>                  ] 193.63M  16.0MB/s    eta 89s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  13%[=>                  ] 197.46M  16.0MB/s    eta 89s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  13%[=>                  ] 199.09M  15.5MB/s    eta 89s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  13%[=>                  ] 202.45M  15.5MB/s    eta 89s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  14%[=>                  ] 206.15M  15.6MB/s    eta 89s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  14%[=>                  ] 209.87M  15.5MB/s    eta 88s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  14%[=>                  ] 213.65M  15.5MB/s    eta 88s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  14%[=>                  ] 217.40M  15.7MB/s    eta 88s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  15%[==>                 ] 221.12M  15.7MB/s    eta 88s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  15%[==>                 ] 224.95M  15.8MB/s    eta 88s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  15%[==>                 ] 228.85M  15.9MB/s    eta 85s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  15%[==>                 ] 232.63M  15.9MB/s    eta 85s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  16%[==>                 ] 236.48M  16.0MB/s    eta 85s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  16%[==>                 ] 240.31M  16.0MB/s    eta 85s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  16%[==>                 ] 244.15M  16.0MB/s    eta 85s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  16%[==>                 ] 247.99M  16.0MB/s    eta 83s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  17%[==>                 ] 251.73M  15.9MB/s    eta 83s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  17%[==>                 ] 255.59M  16.0MB/s    eta 83s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  17%[==>                 ] 259.38M  16.0MB/s    eta 83s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  17%[==>                 ] 263.13M  16.0MB/s    eta 83s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   18%[==>                 ] 266.95M  15.9MB/s    eta 81s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    18%[==>                 ] 270.76M  15.9MB/s    eta 81s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     18%[==>                 ] 274.51M  16.4MB/s    eta 81s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      18%[==>                 ] 278.38M  16.5MB/s    eta 81s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       19%[==>                 ] 282.12M  16.5MB/s    eta 81s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        19%[==>                 ] 285.85M  16.5MB/s    eta 79s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         19%[==>                 ] 289.57M  16.5MB/s    eta 79s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          20%[===>                ] 293.38M  16.5MB/s    eta 79s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           20%[===>                ] 297.13M  16.6MB/s    eta 79s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            20%[===>                ] 300.90M  16.5MB/s    eta 79s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             20%[===>                ] 304.67M  16.5MB/s    eta 78s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              21%[===>                ] 308.38M  16.5MB/s    eta 78s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               21%[===>                ] 312.13M  16.5MB/s    eta 78s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                21%[===>                ] 316.20M  16.5MB/s    eta 78s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 21%[===>                ] 319.92M  16.5MB/s    eta 78s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  22%[===>                ] 323.67M  16.5MB/s    eta 76s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   22%[===>                ] 327.51M  16.5MB/s    eta 76s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    22%[===>                ] 331.24M  16.5MB/s    eta 76s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     22%[===>                ] 335.06M  16.5MB/s    eta 76s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  23%[===>                ] 338.79M  16.5MB/s    eta 76s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  23%[===>                ] 342.60M  16.5MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  23%[===>                ] 346.38M  16.5MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  23%[===>                ] 350.20M  16.5MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  24%[===>                ] 354.12M  16.5MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  24%[===>                ] 357.88M  16.5MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  24%[===>                ] 361.63M  16.5MB/s    eta 73s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  24%[===>                ] 365.38M  16.5MB/s    eta 73s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  25%[====>               ] 369.10M  16.5MB/s    eta 73s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  25%[====>               ] 372.92M  16.5MB/s    eta 73s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  25%[====>               ] 376.79M  16.5MB/s    eta 73s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  25%[====>               ] 380.63M  16.5MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  26%[====>               ] 384.38M  16.5MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  26%[====>               ] 387.63M  16.4MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  26%[====>               ] 388.07M  15.7MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  26%[====>               ] 391.90M  15.7MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  26%[====>               ] 394.49M  15.5MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  27%[====>               ] 397.45M  15.3MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  27%[====>               ] 400.42M  15.1MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  27%[====>               ] 403.48M  15.0MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  27%[====>               ] 406.03M  14.7MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  27%[====>               ] 409.13M  14.6MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  28%[====>               ] 412.31M  14.4MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  28%[====>               ] 415.51M  14.3MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  28%[====>               ] 418.73M  14.2MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  28%[====>               ] 421.73M  14.0MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  28%[====>               ] 424.85M  13.8MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  29%[====>               ] 428.06M  13.7MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  29%[====>               ] 431.29M  13.6MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  29%[====>               ] 434.60M  13.5MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  29%[====>               ] 437.81M  13.4MB/s    eta 70s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  30%[=====>              ] 441.10M  13.3MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  30%[=====>              ] 444.49M  13.2MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  30%[=====>              ] 447.85M  13.2MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   30%[=====>              ] 451.23M  13.8MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    31%[=====>              ] 454.65M  13.8MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     31%[=====>              ] 458.06M  13.9MB/s    eta 68s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      31%[=====>              ] 461.48M  14.0MB/s    eta 68s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       31%[=====>              ] 464.90M  14.2MB/s    eta 68s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        31%[=====>              ] 468.31M  14.2MB/s    eta 68s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         32%[=====>              ] 471.76M  14.4MB/s    eta 68s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          32%[=====>              ] 475.26M  14.4MB/s    eta 66s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           32%[=====>              ] 477.73M  14.3MB/s    eta 66s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            32%[=====>              ] 481.17M  14.3MB/s    eta 66s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             33%[=====>              ] 484.62M  14.4MB/s    eta 66s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              33%[=====>              ] 488.09M  14.5MB/s    eta 66s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               33%[=====>              ] 491.59M  14.6MB/s    eta 65s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                33%[=====>              ] 495.10M  14.7MB/s    eta 65s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 33%[=====>              ] 498.07M  14.6MB/s    eta 65s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  34%[=====>              ] 501.29M  14.6MB/s    eta 65s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   34%[=====>              ] 504.70M  14.6MB/s    eta 65s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    34%[=====>              ] 508.23M  14.6MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     34%[=====>              ] 511.73M  14.7MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  35%[======>             ] 515.20M  14.7MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  35%[======>             ] 518.76M  14.8MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  35%[======>             ] 521.78M  14.7MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  35%[======>             ] 525.34M  14.7MB/s    eta 63s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  36%[======>             ] 528.79M  14.7MB/s    eta 63s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  36%[======>             ] 532.29M  14.7MB/s    eta 63s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  36%[======>             ] 535.76M  14.7MB/s    eta 63s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  36%[======>             ] 539.28M  14.7MB/s    eta 63s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  36%[======>             ] 541.87M  14.6MB/s    eta 62s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  37%[======>             ] 545.32M  14.8MB/s    eta 62s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  37%[======>             ] 548.84M  14.8MB/s    eta 62s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  37%[======>             ] 552.34M  14.9MB/s    eta 62s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  37%[======>             ] 555.78M  14.8MB/s    eta 62s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  38%[======>             ] 559.32M  14.9MB/s    eta 61s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  38%[======>             ] 561.92M  14.6MB/s    eta 61s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  38%[======>             ] 565.42M  14.8MB/s    eta 61s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  38%[======>             ] 568.92M  14.9MB/s    eta 61s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  39%[======>             ] 572.49M  14.9MB/s    eta 61s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  39%[======>             ] 575.17M  14.7MB/s    eta 60s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  39%[======>             ] 578.63M  14.7MB/s    eta 60s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  39%[======>             ] 582.20M  14.7MB/s    eta 60s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  39%[======>             ] 585.71M  14.7MB/s    eta 60s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  40%[=======>            ] 589.18M  14.8MB/s    eta 60s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  40%[=======>            ] 592.70M  14.8MB/s    eta 59s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  40%[=======>            ] 596.18M  14.8MB/s    eta 59s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  40%[=======>            ] 599.78M  14.8MB/s    eta 59s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  41%[=======>            ] 602.70M  14.6MB/s    eta 59s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  41%[=======>            ] 606.18M  14.6MB/s    eta 59s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  41%[=======>            ] 609.79M  14.8MB/s    eta 58s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  41%[=======>            ] 613.31M  14.8MB/s    eta 58s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  42%[=======>            ] 615.92M  14.6MB/s    eta 58s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  42%[=======>            ] 619.49M  14.6MB/s    eta 58s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  42%[=======>            ] 623.07M  14.7MB/s    eta 58s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  42%[=======>            ] 626.73M  14.7MB/s    eta 56s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   42%[=======>            ] 630.40M  14.9MB/s    eta 56s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    43%[=======>            ] 634.06M  15.0MB/s    eta 56s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     43%[=======>            ] 637.68M  15.0MB/s    eta 56s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      43%[=======>            ] 641.40M  15.0MB/s    eta 56s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       43%[=======>            ] 645.04M  15.3MB/s    eta 55s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        44%[=======>            ] 648.71M  15.3MB/s    eta 55s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         44%[=======>            ] 652.49M  15.4MB/s    eta 55s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          44%[=======>            ] 656.26M  15.4MB/s    eta 55s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           45%[========>           ] 660.07M  15.3MB/s    eta 55s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            45%[========>           ] 663.84M  15.4MB/s    eta 54s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             45%[========>           ] 667.57M  15.4MB/s    eta 54s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              45%[========>           ] 671.35M  15.4MB/s    eta 54s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               46%[========>           ] 675.13M  15.7MB/s    eta 54s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                46%[========>           ] 678.79M  15.8MB/s    eta 54s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 46%[========>           ] 682.62M  15.8MB/s    eta 52s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  46%[========>           ] 686.34M  15.8MB/s    eta 52s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   47%[========>           ] 690.13M  16.1MB/s    eta 52s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    47%[========>           ] 693.85M  16.1MB/s    eta 52s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     47%[========>           ] 697.71M  16.2MB/s    eta 52s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  47%[========>           ] 701.46M  16.2MB/s    eta 51s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  48%[========>           ] 705.26M  16.2MB/s    eta 51s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  48%[========>           ] 709.01M  16.2MB/s    eta 51s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  48%[========>           ] 712.85M  16.2MB/s    eta 51s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  48%[========>           ] 716.67M  16.2MB/s    eta 51s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  49%[========>           ] 719.21M  16.0MB/s    eta 50s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  49%[========>           ] 722.99M  16.0MB/s    eta 50s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  49%[========>           ] 726.73M  16.0MB/s    eta 50s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  49%[========>           ] 730.45M  16.0MB/s    eta 50s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  50%[=========>          ] 734.38M  16.2MB/s    eta 50s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  50%[=========>          ] 738.12M  16.2MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  50%[=========>          ] 741.13M  16.0MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  50%[=========>          ] 741.59M  15.3MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  50%[=========>          ] 745.43M  15.3MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  51%[=========>          ] 747.93M  15.0MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  51%[=========>          ] 750.78M  14.8MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  51%[=========>          ] 752.87M  14.4MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  51%[=========>          ] 755.03M  14.1MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  51%[=========>          ] 757.21M  13.8MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  51%[=========>          ] 759.43M  13.4MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  51%[=========>          ] 761.67M  13.1MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  52%[=========>          ] 763.93M  12.8MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  52%[=========>          ] 766.23M  12.5MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  52%[=========>          ] 768.54M  12.1MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  52%[=========>          ] 770.87M  11.8MB/s    eta 48s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  52%[=========>          ] 773.23M  11.8MB/s    eta 47s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  52%[=========>          ] 775.60M  11.5MB/s    eta 47s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  53%[=========>          ] 777.99M  11.2MB/s    eta 47s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  53%[=========>          ] 780.40M  10.9MB/s    eta 47s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  53%[=========>          ] 782.82M  10.6MB/s    eta 47s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  53%[=========>          ] 785.24M  10.3MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  53%[=========>          ] 787.70M  10.2MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  53%[=========>          ] 790.15M  10.6MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  54%[=========>          ] 792.63M  10.3MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   54%[=========>          ] 795.10M  10.4MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    54%[=========>          ] 797.60M  10.3MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     54%[=========>          ] 800.10M  10.4MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      54%[=========>          ] 802.60M  10.5MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       54%[=========>          ] 805.12M  10.6MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        55%[==========>         ] 807.63M  10.6MB/s    eta 46s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         55%[==========>         ] 810.17M  10.7MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          55%[==========>         ] 812.70M  10.8MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           55%[==========>         ] 815.23M  10.8MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            55%[==========>         ] 817.76M  10.9MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             55%[==========>         ] 820.31M  10.9MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              56%[==========>         ] 822.85M  11.0MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               56%[==========>         ] 825.40M  11.0MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                56%[==========>         ] 827.95M  11.0MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 56%[==========>         ] 830.48M  11.1MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  56%[==========>         ] 833.04M  11.1MB/s    eta 45s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   56%[==========>         ] 835.59M  11.1MB/s    eta 44s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    57%[==========>         ] 838.15M  11.2MB/s    eta 44s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     57%[==========>         ] 840.71M  11.2MB/s    eta 44s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  57%[==========>         ] 843.26M  11.2MB/s    eta 44s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  57%[==========>         ] 845.81M  11.2MB/s    eta 44s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  57%[==========>         ] 848.37M  11.2MB/s    eta 43s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  58%[==========>         ] 850.92M  11.2MB/s    eta 43s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  58%[==========>         ] 853.46M  11.2MB/s    eta 43s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  58%[==========>         ] 856.03M  11.3MB/s    eta 43s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  58%[==========>         ] 858.57M  11.3MB/s    eta 43s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  58%[==========>         ] 861.12M  11.3MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  58%[==========>         ] 863.68M  11.3MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  59%[==========>         ] 866.23M  11.3MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  59%[==========>         ] 868.79M  11.3MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  59%[==========>         ] 871.34M  11.3MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  59%[==========>         ] 873.90M  11.1MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  59%[==========>         ] 876.43M  10.8MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  59%[==========>         ] 878.99M  11.3MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  60%[===========>        ] 881.56M  11.1MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  60%[===========>        ] 883.37M  10.7MB/s    eta 42s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  60%[===========>        ] 887.10M  10.9MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  60%[===========>        ] 888.93M  10.6MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  60%[===========>        ] 890.81M  10.9MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  60%[===========>        ] 892.73M  10.6MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  61%[===========>        ] 894.70M  10.3MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  61%[===========>        ] 896.70M  10.2MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  61%[===========>        ] 898.54M  10.2MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  61%[===========>        ] 899.99M  9.85MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  61%[===========>        ] 902.09M  9.77MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  61%[===========>        ] 904.21M  9.95MB/s    eta 41s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  61%[===========>        ] 906.38M  9.73MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  61%[===========>        ] 908.57M  9.56MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  62%[===========>        ] 910.79M  9.35MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  62%[===========>        ] 913.04M  9.35MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  62%[===========>        ] 915.31M  9.43MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  62%[===========>        ] 917.60M  9.34MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  62%[===========>        ] 919.93M  9.62MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   62%[===========>        ] 922.26M  9.32MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    63%[===========>        ] 924.62M  9.54MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     63%[===========>        ] 927.01M  9.54MB/s    eta 40s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      63%[===========>        ] 929.40M  9.66MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       63%[===========>        ] 931.81M  9.83MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        63%[===========>        ] 934.23M  10.0MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         63%[===========>        ] 936.68M  9.99MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          64%[===========>        ] 939.13M  10.1MB/s    eta 39s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           64%[===========>        ] 941.60M  10.1MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            64%[===========>        ] 944.09M  10.3MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             64%[===========>        ] 946.57M  10.5MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              64%[===========>        ] 949.07M  10.3MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               64%[===========>        ] 951.57M  10.5MB/s    eta 38s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                65%[============>       ] 954.09M  10.7MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 65%[============>       ] 956.54M  10.8MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  65%[============>       ] 959.06M  10.6MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   65%[============>       ] 961.60M  10.6MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    65%[============>       ] 964.13M  10.8MB/s    eta 37s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     65%[============>       ] 966.70M  10.9MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  66%[============>       ] 969.24M  10.8MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  66%[============>       ] 971.79M  10.7MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  66%[============>       ] 974.35M  10.9MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  66%[============>       ] 976.92M  11.1MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  66%[============>       ] 979.48M  11.1MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  66%[============>       ] 982.04M  10.8MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  67%[============>       ] 984.60M  11.0MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  67%[============>       ] 987.18M  11.2MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  67%[============>       ] 989.74M  11.1MB/s    eta 36s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  67%[============>       ] 992.31M  11.1MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  67%[============>       ] 994.88M  11.2MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  68%[============>       ] 997.45M  11.1MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  68%[============>       ]   1000M  11.1MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  68%[============>       ]   1003M  11.3MB/s    eta 35s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  68%[============>       ]   1005M  11.2MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  68%[============>       ]   1008M  11.3MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  68%[============>       ]   1010M  11.2MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  69%[============>       ]   1013M  11.2MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  69%[============>       ]   1015M  11.2MB/s    eta 34s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  69%[============>       ]   1018M  11.2MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  69%[============>       ]   1021M  11.0MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  69%[============>       ]   1023M  11.2MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  69%[============>       ]   1.00G  11.3MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  70%[=============>      ]   1.00G  11.2MB/s    eta 33s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  70%[=============>      ]   1.01G  11.0MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  70%[=============>      ]   1.01G  11.2MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  70%[=============>      ]   1.01G  11.2MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  70%[=============>      ]   1.01G  11.0MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  70%[=============>      ]   1.02G  11.2MB/s    eta 32s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  71%[=============>      ]   1.02G  11.4MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  71%[=============>      ]   1.02G  11.4MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  71%[=============>      ]   1.02G  11.2MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  71%[=============>      ]   1.03G  11.3MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  71%[=============>      ]   1.03G  11.4MB/s    eta 31s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   72%[=============>      ]   1.03G  11.4MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    72%[=============>      ]   1.03G  11.4MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     72%[=============>      ]   1.04G  11.4MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      72%[=============>      ]   1.04G  11.3MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       72%[=============>      ]   1.04G  11.1MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        72%[=============>      ]   1.04G  11.5MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         73%[=============>      ]   1.05G  11.3MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          73%[=============>      ]   1.05G  11.2MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           73%[=============>      ]   1.05G  11.4MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            73%[=============>      ]   1.05G  11.4MB/s    eta 30s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             73%[=============>      ]   1.06G  11.3MB/s    eta 29s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              74%[=============>      ]   1.06G  11.5MB/s    eta 29s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               74%[=============>      ]   1.06G  11.7MB/s    eta 29s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                74%[=============>      ]   1.07G  11.6MB/s    eta 29s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 74%[=============>      ]   1.07G  11.4MB/s    eta 29s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  74%[=============>      ]   1.07G  11.6MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   75%[==============>     ]   1.07G  11.9MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    75%[==============>     ]   1.08G  11.9MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     75%[==============>     ]   1.08G  11.9MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  75%[==============>     ]   1.08G  11.9MB/s    eta 28s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  75%[==============>     ]   1.08G  12.0MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  75%[==============>     ]   1.09G  12.0MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  76%[==============>     ]   1.09G  11.9MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  76%[==============>     ]   1.09G  12.2MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  76%[==============>     ]   1.10G  12.4MB/s    eta 27s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  76%[==============>     ]   1.10G  12.5MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  77%[==============>     ]   1.10G  12.4MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  77%[==============>     ]   1.11G  12.3MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  77%[==============>     ]   1.11G  12.7MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  77%[==============>     ]   1.11G  12.9MB/s    eta 25s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  77%[==============>     ]   1.12G  13.0MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  78%[==============>     ]   1.12G  12.9MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  78%[==============>     ]   1.12G  13.1MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  78%[==============>     ]   1.12G  13.2MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  78%[==============>     ]   1.13G  13.1MB/s    eta 24s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  79%[==============>     ]   1.13G  13.5MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  79%[==============>     ]   1.14G  13.7MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  79%[==============>     ]   1.14G  13.9MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  79%[==============>     ]   1.14G  14.1MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  80%[===============>    ]   1.15G  14.3MB/s    eta 23s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  80%[===============>    ]   1.15G  14.4MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  80%[===============>    ]   1.15G  14.5MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  80%[===============>    ]   1.16G  14.7MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  81%[===============>    ]   1.16G  14.9MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  81%[===============>    ]   1.17G  15.0MB/s    eta 21s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  81%[===============>    ]   1.17G  15.2MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  81%[===============>    ]   1.17G  15.3MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  82%[===============>    ]   1.18G  15.4MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  82%[===============>    ]   1.18G  15.5MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  82%[===============>    ]   1.18G  15.7MB/s    eta 20s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  82%[===============>    ]   1.19G  15.8MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  83%[===============>    ]   1.19G  16.1MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  83%[===============>    ]   1.19G  16.2MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   83%[===============>    ]   1.20G  16.3MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    83%[===============>    ]   1.20G  16.4MB/s    eta 19s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     84%[===============>    ]   1.21G  16.4MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      84%[===============>    ]   1.21G  16.4MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       84%[===============>    ]   1.21G  16.4MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        84%[===============>    ]   1.22G  16.4MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         85%[================>   ]   1.22G  16.5MB/s    eta 17s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          85%[================>   ]   1.22G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           85%[================>   ]   1.23G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            86%[================>   ]   1.23G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             86%[================>   ]   1.24G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              86%[================>   ]   1.24G  16.5MB/s    eta 16s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               86%[================>   ]   1.24G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2.pth                87%[================>   ]   1.25G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 87%[================>   ]   1.25G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  87%[================>   ]   1.25G  16.5MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   87%[================>   ]   1.26G  16.6MB/s    eta 14s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    88%[================>   ]   1.26G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     88%[================>   ]   1.26G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  88%[================>   ]   1.27G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                 v5  88%[================>   ]   1.27G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                v5r  89%[================>   ]   1.28G  16.5MB/s    eta 13s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "               v5r3  89%[================>   ]   1.28G  16.5MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "              v5r3-  89%[================>   ]   1.28G  16.5MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "             v5r3-L  89%[================>   ]   1.28G  16.0MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "            v5r3-L6  89%[================>   ]   1.29G  15.8MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "           v5r3-L6-  90%[=================>  ]   1.29G  15.6MB/s    eta 11s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "          v5r3-L6-D  90%[=================>  ]   1.29G  15.5MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "         v5r3-L6-D2  90%[=================>  ]   1.30G  15.3MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5r3-L6-D25  90%[=================>  ]   1.30G  15.2MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5r3-L6-D256  90%[=================>  ]   1.30G  15.1MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5r3-L6-D2560  91%[=================>  ]   1.31G  15.0MB/s    eta 10s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5r3-L6-D2560-  91%[=================>  ]   1.31G  14.9MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5r3-L6-D2560-E  91%[=================>  ]   1.31G  14.8MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5r3-L6-D2560-E0  91%[=================>  ]   1.32G  14.7MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5r3-L6-D2560-E0_  92%[=================>  ]   1.32G  14.6MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5r3-L6-D2560-E0_1  92%[=================>  ]   1.32G  14.6MB/s    eta 9s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1-  92%[=================>  ]   1.33G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5r3-L6-D2560-E0_1-m  92%[=================>  ]   1.33G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "r3-L6-D2560-E0_1-me  93%[=================>  ]   1.33G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "3-L6-D2560-E0_1-mem  93%[=================>  ]   1.34G  14.3MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L6-D2560-E0_1-mem-  93%[=================>  ]   1.34G  14.2MB/s    eta 8s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L6-D2560-E0_1-mem-c  93%[=================>  ]   1.34G  14.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D2560-E0_1-mem-ct  94%[=================>  ]   1.35G  14.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D2560-E0_1-mem-ctx  94%[=================>  ]   1.35G  14.8MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D2560-E0_1-mem-ctx-  94%[=================>  ]   1.35G  15.0MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "2560-E0_1-mem-ctx-5  94%[=================>  ]   1.36G  15.2MB/s    eta 7s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "560-E0_1-mem-ctx-51  95%[==================> ]   1.36G  15.3MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "60-E0_1-mem-ctx-512  95%[==================> ]   1.37G  15.4MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0-E0_1-mem-ctx-512.  95%[==================> ]   1.37G  15.5MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-512.p  95%[==================> ]   1.37G  15.7MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-512.pt  96%[==================> ]   1.38G  15.8MB/s    eta 5s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-512.pth  96%[==================> ]   1.38G  15.9MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-512.pth   96%[==================> ]   1.38G  16.0MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-512.pth    96%[==================> ]   1.39G  16.0MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-512.pth     97%[==================> ]   1.39G  16.2MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-512.pth      97%[==================> ]   1.40G  16.2MB/s    eta 4s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-512.pth       97%[==================> ]   1.40G  16.4MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-512.pth        97%[==================> ]   1.40G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-512.pth         98%[==================> ]   1.41G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-512.pth          98%[==================> ]   1.41G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-512.pth           98%[==================> ]   1.41G  16.5MB/s    eta 2s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-512.pth            99%[==================> ]   1.42G  16.6MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-512.pth             99%[==================> ]   1.42G  16.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "512.pth              99%[==================> ]   1.42G  16.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "12.pth               99%[==================> ]   1.43G  16.5MB/s    eta 1s     "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5r3-L6-D2560-E0_1- 100%[===================>]   1.43G  17.1MB/s    in 1m 46s  \r\n",
+      "\r\n",
+      "2023-09-14 00:23:39 (13.9 MB/s) - ‘v5r3-L6-D2560-E0_1-mem-ctx-512.pth’ saved [1537632513/1537632513]\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 1.5G\r\n",
+      "drwxr-xr-x  2 root root    3 Sep 14 00:21 .\r\n",
+      "drwxr-xr-x 20 root root   24 Sep 14 00:21 ..\r\n",
+      "-rw-r--r--  1 root root 1.5G Sep 13 12:45 v5r3-L6-D2560-E0_1-mem-ctx-512.pth\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44993c1b",
+   "metadata": {
+    "papermill": {
+     "duration": 0.040583,
+     "end_time": "2023-09-14T00:23:39.603420",
+     "exception": false,
+     "start_time": "2023-09-14T00:23:39.562837",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 3: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "3d8f956f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:23:39.641463Z",
+     "iopub.status.busy": "2023-09-14T00:23:39.641175Z",
+     "iopub.status.idle": "2023-09-14T00:24:02.625663Z",
+     "shell.execute_reply": "2023-09-14T00:24:02.625201Z"
+    },
+    "papermill": {
+     "duration": 23.023454,
+     "end_time": "2023-09-14T00:24:02.645057",
+     "exception": false,
+     "start_time": "2023-09-14T00:23:39.621603",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Generating word reptition dataset ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 357 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 328 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 277 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 383 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 300 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 657 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 746 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 479 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 438 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1057 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1300 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 317 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 527 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 873 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 262 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 180 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 410 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 79 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 524 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 1765 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 2594 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 270 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 80 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 99 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 139 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 27 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 55 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 25 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 5535 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Done ##\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 2.2G\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20K Sep 14 00:23 gen-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 2.1M Sep 14 00:23 gen-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  20M Sep 14 00:23 gen-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  22M Sep 14 00:23 gen-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  23M Sep 14 00:23 gen-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25M Sep 14 00:23 gen-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  27M Sep 14 00:23 gen-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  25K Sep 14 00:23 gen-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  29M Sep 14 00:23 gen-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  31M Sep 14 00:23 gen-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  33M Sep 14 00:23 gen-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  35M Sep 14 00:23 gen-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  37M Sep 14 00:23 gen-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  30K Sep 14 00:23 gen-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 4.0M Sep 14 00:23 gen-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  39M Sep 14 00:23 gen-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  41M Sep 14 00:23 gen-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  42M Sep 14 00:23 gen-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44M Sep 14 00:23 gen-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  46M Sep 14 00:23 gen-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  34K Sep 14 00:23 gen-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  48M Sep 14 00:23 gen-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  50M Sep 14 00:23 gen-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  52M Sep 14 00:23 gen-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54M Sep 14 00:23 gen-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  56M Sep 14 00:23 gen-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  40K Sep 14 00:23 gen-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 5.9M Sep 14 00:23 gen-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  58M Sep 14 00:23 gen-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  60M Sep 14 00:23 gen-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  61M Sep 14 00:23 gen-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  63M Sep 14 00:23 gen-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  65M Sep 14 00:23 gen-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  44K Sep 14 00:23 gen-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  67M Sep 14 00:23 gen-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  69M Sep 14 00:23 gen-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  71M Sep 14 00:23 gen-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73M Sep 14 00:23 gen-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  75M Sep 14 00:23 gen-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  49K Sep 14 00:23 gen-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 7.9M Sep 14 00:23 gen-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77M Sep 14 00:23 gen-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  79M Sep 14 00:23 gen-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  80M Sep 14 00:23 gen-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  82M Sep 14 00:23 gen-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84M Sep 14 00:23 gen-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  54K Sep 14 00:23 gen-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  86M Sep 14 00:23 gen-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  88M Sep 14 00:23 gen-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  90M Sep 14 00:23 gen-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  92M Sep 14 00:23 gen-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  94M Sep 14 00:23 gen-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  15K Sep 14 00:23 gen-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  59K Sep 14 00:23 gen-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 9.7M Sep 14 00:23 gen-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  96M Sep 14 00:23 gen-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  98M Sep 14 00:23 gen-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  99M Sep 14 00:23 gen-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 101M Sep 14 00:24 gen-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 103M Sep 14 00:23 gen-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  64K Sep 14 00:23 gen-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 105M Sep 14 00:23 gen-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 107M Sep 14 00:23 gen-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 109M Sep 14 00:24 gen-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 111M Sep 14 00:24 gen-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 113M Sep 14 00:24 gen-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  68K Sep 14 00:23 gen-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  12M Sep 14 00:23 gen-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 115M Sep 14 00:23 gen-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 117M Sep 14 00:23 gen-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 118M Sep 14 00:23 gen-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 120M Sep 14 00:24 gen-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 122M Sep 14 00:24 gen-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  73K Sep 14 00:23 gen-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 124M Sep 14 00:24 gen-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 126M Sep 14 00:24 gen-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 128M Sep 14 00:24 gen-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 130M Sep 14 00:24 gen-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 132M Sep 14 00:24 gen-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  77K Sep 14 00:23 gen-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  14M Sep 14 00:23 gen-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 134M Sep 14 00:24 gen-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 136M Sep 14 00:24 gen-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 137M Sep 14 00:24 gen-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 139M Sep 14 00:24 gen-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 141M Sep 14 00:24 gen-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  84K Sep 14 00:23 gen-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 143M Sep 14 00:24 gen-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 145M Sep 14 00:24 gen-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 147M Sep 14 00:24 gen-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 149M Sep 14 00:24 gen-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 151M Sep 14 00:24 gen-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  89K Sep 14 00:23 gen-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  16M Sep 14 00:23 gen-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 153M Sep 14 00:24 gen-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  93K Sep 14 00:23 gen-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  98K Sep 14 00:23 gen-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root  18M Sep 14 00:23 gen-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 104K Sep 14 00:23 gen-word-95-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 500K Sep 14 00:23 shuffle-word-10-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 567K Sep 14 00:23 shuffle-word-100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-1100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 523K Sep 14 00:23 shuffle-word-1200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-1400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 427K Sep 14 00:23 shuffle-word-15-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 517K Sep 14 00:23 shuffle-word-1700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 519K Sep 14 00:23 shuffle-word-1800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-1900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 381K Sep 14 00:23 shuffle-word-20-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 543K Sep 14 00:23 shuffle-word-200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-2000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 521K Sep 14 00:23 shuffle-word-2100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-2200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-2300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-2400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 358K Sep 14 00:23 shuffle-word-25-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-2500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 513K Sep 14 00:23 shuffle-word-2600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 512K Sep 14 00:23 shuffle-word-2700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-2800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-2900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 338K Sep 14 00:23 shuffle-word-30-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 530K Sep 14 00:23 shuffle-word-300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 320K Sep 14 00:23 shuffle-word-35-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 324K Sep 14 00:23 shuffle-word-40-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 530K Sep 14 00:23 shuffle-word-400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-4300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 313K Sep 14 00:23 shuffle-word-45-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 791K Sep 14 00:23 shuffle-word-5-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 299K Sep 14 00:23 shuffle-word-50-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 524K Sep 14 00:23 shuffle-word-500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-5400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 302K Sep 14 00:23 shuffle-word-55-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-5500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 298K Sep 14 00:23 shuffle-word-60-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 525K Sep 14 00:23 shuffle-word-600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 299K Sep 14 00:23 shuffle-word-65-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 297K Sep 14 00:23 shuffle-word-70-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 526K Sep 14 00:23 shuffle-word-700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7100-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7200-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7300-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7400-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 293K Sep 14 00:23 shuffle-word-75-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7500-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7600-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7700-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 289K Sep 14 00:23 shuffle-word-80-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 525K Sep 14 00:23 shuffle-word-800-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-8000-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 288K Sep 14 00:23 shuffle-word-85-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 284K Sep 14 00:23 shuffle-word-90-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-900-count.jsonl\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-rw-r--r-- 1 root root 284K Sep 14 00:23 shuffle-word-95-count.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Go to config dir\n",
+    "cd \"../\"\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "for i in {5..100..5} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 100+ - 4200 words dataset\n",
+    "# \n",
+    "for i in {100..8000..100} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -lh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "701b6753",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:24:02.712544Z",
+     "iopub.status.busy": "2023-09-14T00:24:02.712008Z",
+     "iopub.status.idle": "2023-09-14T00:24:35.560189Z",
+     "shell.execute_reply": "2023-09-14T00:24:35.559394Z"
+    },
+    "papermill": {
+     "duration": 32.883629,
+     "end_time": "2023-09-14T00:24:35.562107",
+     "exception": false,
+     "start_time": "2023-09-14T00:24:02.678478",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-09-14 00:24:05,633] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2560-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2560-E0_1-mem-ctx-512.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4194784656\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 4194784656\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230914_002408-wrr91tv7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/wrr91tv7\u001b[0m\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:554: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
+      "  rank_zero_warn(\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GPU available: True (cuda), used: True\r\n",
+      "TPU available: False, using: 0 TPU cores\r\n",
+      "IPU available: False, using: 0 IPUs\r\n",
+      "HPU available: False, using: 0 HPUs\r\n",
+      "\r\n",
+      "\r\n",
+      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
+      "   - target_batch_size:       256\r\n",
+      "   - num_nodes:               1\r\n",
+      "   - num_devices:             1\r\n",
+      "   - accumulate_grad_batches: 256\r\n",
+      "   - effective_batch_size:    256\r\n",
+      "\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Resolving data files:   0%|                             | 0/198 [00:00<?, ?it/s]\r",
+      "Resolving data files: 100%|███████████████| 198/198 [00:00<00:00, 148883.51it/s]\r\n",
+      "\r",
+      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 287.97it/s]\r\n",
+      "\r",
+      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00, 12.62it/s]\r\n",
+      "\r",
+      "Generating train split: 0 examples [00:00, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 2625 examples [00:00, 3049.60 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 17014 examples [00:00, 22989.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 33146 examples [00:01, 46148.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 44997 examples [00:01, 51154.78 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 55124 examples [00:01, 48517.02 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 64087 examples [00:01, 35495.81 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 70515 examples [00:02, 36667.80 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 75772 examples [00:02, 30708.14 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 80333 examples [00:02, 30088.94 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 84141 examples [00:02, 27930.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 87548 examples [00:02, 27984.68 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 90787 examples [00:02, 26886.31 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 93778 examples [00:03, 27040.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 96973 examples [00:03, 27248.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 101191 examples [00:03, 30724.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 106104 examples [00:03, 34273.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 110290 examples [00:03, 34016.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 113802 examples [00:03, 32420.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 117160 examples [00:03, 22500.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 120238 examples [00:04, 21384.63 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 122791 examples [00:04, 21213.85 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 125835 examples [00:04, 23194.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 129263 examples [00:04, 24367.39 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to read file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset/shuffle-word-100-count.jsonl' with error <class 'pyarrow.lib.ArrowInvalid'>: JSON parse error: Missing a comma or '}' after an object member. in row 233\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 136606 examples [00:04, 35027.49 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 140455 examples [00:04, 20618.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 143531 examples [00:05, 21445.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 146681 examples [00:05, 22082.33 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 149694 examples [00:05, 22730.77 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 152975 examples [00:05, 22489.21 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 155530 examples [00:05, 20086.23 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 158092 examples [00:05, 15943.50 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 160242 examples [00:06, 15481.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 162322 examples [00:06, 14098.18 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 164028 examples [00:06, 13989.96 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 165725 examples [00:06, 13009.40 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 167287 examples [00:06, 12674.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 169262 examples [00:06, 13937.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 170803 examples [00:06, 14075.62 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 172262 examples [00:06, 13051.73 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 173727 examples [00:07, 12319.32 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 175213 examples [00:07, 12084.54 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 176512 examples [00:07, 11651.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 177716 examples [00:07, 11150.69 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 178902 examples [00:07, 11209.52 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 180206 examples [00:07, 11234.12 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 181684 examples [00:07, 11864.15 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 182992 examples [00:07, 11920.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Generating train split: 183622 examples [00:08, 22556.31 examples/s]\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "multiprocess.pool.RemoteTraceback: \r\n",
+      "\"\"\"\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 144, in _generate_tables\r\n",
+      "    dataset = json.load(f)\r\n",
+      "  File \"/usr/lib/python3.10/json/__init__.py\", line 293, in load\r\n",
+      "    return loads(fp.read(),\r\n",
+      "  File \"/usr/lib/python3.10/json/__init__.py\", line 346, in loads\r\n",
+      "    return _default_decoder.decode(s)\r\n",
+      "  File \"/usr/lib/python3.10/json/decoder.py\", line 340, in decode\r\n",
+      "    raise JSONDecodeError(\"Extra data\", s, end)\r\n",
+      "json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 1121)\r\n",
+      "\r\n",
+      "During handling of the above exception, another exception occurred:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1925, in _prepare_split_single\r\n",
+      "    for _, table in generator:\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 147, in _generate_tables\r\n",
+      "    raise e\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 121, in _generate_tables\r\n",
+      "    pa_table = paj.read_json(\r\n",
+      "  File \"pyarrow/_json.pyx\", line 258, in pyarrow._json.read_json\r\n",
+      "  File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\r\n",
+      "  File \"pyarrow/error.pxi\", line 100, in pyarrow.lib.check_status\r\n",
+      "pyarrow.lib.ArrowInvalid: JSON parse error: Missing a comma or '}' after an object member. in row 233\r\n",
+      "\r\n",
+      "The above exception was the direct cause of the following exception:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 125, in worker\r\n",
+      "    result = (True, func(*args, **kwds))\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1347, in _write_generator_to_queue\r\n",
+      "    for i, result in enumerate(func(**kwargs)):\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1958, in _prepare_split_single\r\n",
+      "    raise DatasetGenerationError(\"An error occurred while generating the dataset\") from e\r\n",
+      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n",
+      "\"\"\"\r\n",
+      "\r\n",
+      "The above exception was the direct cause of the following exception:\r\n",
+      "\r\n",
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in <module>\r\n",
+      "    cli_main()\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n",
+      "    LightningCLI(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n",
+      "    self._run_subcommand(self.subcommand)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n",
+      "    fn(**fn_kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n",
+      "    call._call_and_handle_interrupt(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n",
+      "    return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n",
+      "    return function(*args, **kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n",
+      "    self._run(model, ckpt_path=ckpt_path)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n",
+      "    self._data_connector.prepare_data()\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n",
+      "    call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n",
+      "    return fn(*args, **kwargs)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n",
+      "    prepare_data_static(**self._init_locals)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n",
+      "    src_dataset = load_dataset(**load_dataset_params)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2136, in load_dataset\r\n",
+      "    builder_instance.download_and_prepare(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 954, in download_and_prepare\r\n",
+      "    self._download_and_prepare(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1049, in _download_and_prepare\r\n",
+      "    self._prepare_split(split_generator, **prepare_split_kwargs)\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1842, in _prepare_split\r\n",
+      "    for job_id, done, content in iflatmap_unordered(\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in iflatmap_unordered\r\n",
+      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in <listcomp>\r\n",
+      "    [async_result.get(timeout=0.05) for async_result in async_results]\r\n",
+      "  File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 774, in get\r\n",
+      "    raise self._value\r\n",
+      "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/wrr91tv7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v53\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230914_002408-wrr91tv7/logs\u001b[0m\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    python3 lightning_trainer.py fit \\\n",
+    "        -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n",
+    "        --model.lr_init=4e-4 \\\n",
+    "        --model.lr_final=2e-4 \\\n",
+    "        --data.max_token_size=8192 \\\n",
+    "        --data.sort_by_length=True \\\n",
+    "        --model.ctx_len=4096 \\\n",
+    "        --model.bptt_learning_range=2 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "500c7607",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:24:35.637916Z",
+     "iopub.status.busy": "2023-09-14T00:24:35.637654Z",
+     "iopub.status.idle": "2023-09-14T00:24:38.174650Z",
+     "shell.execute_reply": "2023-09-14T00:24:38.173904Z"
+    },
+    "papermill": {
+     "duration": 2.574032,
+     "end_time": "2023-09-14T00:24:38.176339",
+     "exception": false,
+     "start_time": "2023-09-14T00:24:35.602307",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-09-14 00:24:37,304] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in <module>\r\n",
+      "    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n",
+      "    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n",
+      "  File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n",
+      "    raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n",
+      "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/last.ckpt/latest\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '../model/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python3 export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a169a91a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:24:38.251360Z",
+     "iopub.status.busy": "2023-09-14T00:24:38.251096Z",
+     "iopub.status.idle": "2023-09-14T00:24:38.489940Z",
+     "shell.execute_reply": "2023-09-14T00:24:38.489202Z"
+    },
+    "papermill": {
+     "duration": 0.276215,
+     "end_time": "2023-09-14T00:24:38.491553",
+     "exception": false,
+     "start_time": "2023-09-14T00:24:38.215338",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "57ad36b1",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-14T00:24:38.567093Z",
+     "iopub.status.busy": "2023-09-14T00:24:38.566558Z",
+     "iopub.status.idle": "2023-09-14T00:24:38.802491Z",
+     "shell.execute_reply": "2023-09-14T00:24:38.801579Z"
+    },
+    "papermill": {
+     "duration": 0.273683,
+     "end_time": "2023-09-14T00:24:38.804465",
+     "exception": false,
+     "start_time": "2023-09-14T00:24:38.530782",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 171.070576,
+   "end_time": "2023-09-14T00:24:38.962638",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb",
+   "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb",
+   "parameters": {},
+   "start_time": "2023-09-14T00:21:47.892062",
+   "version": "2.4.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb
deleted file mode 100644
index d2df3a3f63985ebd40d91463248c0db8fe7ba56b..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:17f586e9a94c3fbf463e84c518ca7712b6fd539d3c8dda8e4893115a1298c8d4
-size 37212168
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb
deleted file mode 100644
index 4c2298987669d56cd07135b0ec4f9da50c75303c..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8b5c4dec751996e61882229f30bba0e005ce01e44319a1b75011aacad7575fc0
-size 30004883
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth
deleted file mode 100644
index b525c72fd31827ffe8aadfecb883cf5b3cceb8f6..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0d91f2c3f5b96e9d249342bdead58f58d3b1f5ab7c92401a50ab4e5170ae2636
-size 1537632373
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth
deleted file mode 100644
index 4aade76d3bbd951a784adce9596a631a5c1640e8..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a00e8cbc2222bb853dc5f83fe3d6f4c43f4b970cc554be37fb937d476e3eaf88
-size 1537632373
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth
deleted file mode 100644
index 4b8c0bd7e1c67c46e2676c3c9141d7c46019114d..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4cc22ca95e9a2054534ef8fcce63cf2d0ce65916b39318ce650debd41adee876
-size 1537632373
diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth
deleted file mode 100644
index 242c0b4bb345f3db30feafae94d0928d82498501..0000000000000000000000000000000000000000
--- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:28d84ce479bb5ca4e3a226f9eb03266c344a398bfff8f420af4ae5598f23fe86
-size 1537632373
diff --git a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth b/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth
deleted file mode 100644
index 1bf1e3dc51ade1b71678c5f59d05067dfad6a652..0000000000000000000000000000000000000000
--- a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:327bb880f90f80ba9d119f0ca43fb108994e02c5935ec0384a619b092fc2f341
-size 5774098255
diff --git a/manual-uploads/3B-code/_anchor b/manual-uploads/3B-code/_anchor
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth
deleted file mode 100644
index 85b2cc712939163ad6ffc54bc460941d4b9d38cd..0000000000000000000000000000000000000000
--- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1844acad3a36721d4427efa928dd7bbe84bff6ec98ceb310db33987106672a8d
-size 3155687506
diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth
deleted file mode 100644
index fbcd575d03307f44cf24f2855f831273aa31a819..0000000000000000000000000000000000000000
--- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3a8289e17931e0d3ed2cc213eaa66e1ce12f005c69030a9afb38b33987f8877b
-size 6126236920
diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth
deleted file mode 100644
index ee19bc64b756b3f670b76b0754f8c9bb20960aab..0000000000000000000000000000000000000000
--- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3faf38a7820276bc2dc36d27259d7067c56aa228ec5dd72f743dfc9d72ff3988
-size 15036330880
diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth
deleted file mode 100644
index 414398090acb93c53e824c8b21fd9a840dc301e6..0000000000000000000000000000000000000000
--- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0dac80051873f2fc1bb4645d7986330b49976520ddad6574ab4ad4d3dc3bdc15
-size 3155687506
diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth
deleted file mode 100644
index 8e7c0e5c5233a0b493e06a7d261ab72a14e68d71..0000000000000000000000000000000000000000
--- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:11238a58929e3cb5c4cfe2777e555c66f01a09e391361e6cc30143eb5360e1ac
-size 6126236920
diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth
deleted file mode 100644
index d46afdf695f7e39687ceae0cafb215d175b58936..0000000000000000000000000000000000000000
--- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:53aeea26eac9b77fdc82484533e98374cc06b08ab2084d7e7f062325b86a912b
-size 15036330880
diff --git a/manual-uploads/RWKV-v5-memory-test/_anchor.txt b/manual-uploads/RWKV-v5-memory-test/_anchor.txt
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000