diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb" --- "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb" +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb" @@ -1,3 +1,7683 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46167bb1edba84bd184533d3d424a8c4d9ed5a15d8b93474596453f235462f13 -size 58866814 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "158fdf2c", + "metadata": { + "papermill": { + "duration": 0.004282, + "end_time": "2023-09-14T00:21:48.785213", + "exception": false, + "start_time": "2023-09-14T00:21:48.780931", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "792a73e5", + "metadata": { + "papermill": { + "duration": 0.002651, + "end_time": "2023-09-14T00:21:48.790826", + "exception": false, + "start_time": "2023-09-14T00:21:48.788175", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "761b91e0", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:21:48.796049Z", + "iopub.status.busy": "2023-09-14T00:21:48.795747Z", + "iopub.status.idle": "2023-09-14T00:21:49.675797Z", + "shell.execute_reply": "2023-09-14T00:21:49.674913Z" + }, + "papermill": { + "duration": 0.88445, + "end_time": "2023-09-14T00:21:49.677690", + "exception": false, + "start_time": "2023-09-14T00:21:48.793240", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d8775637", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:21:49.684909Z", + "iopub.status.busy": "2023-09-14T00:21:49.684672Z", + "iopub.status.idle": "2023-09-14T00:21:51.806309Z", + "shell.execute_reply": "2023-09-14T00:21:51.805544Z" + }, + "papermill": { + "duration": 2.127403, + "end_time": "2023-09-14T00:21:51.808325", + "exception": false, + "start_time": "2023-09-14T00:21:49.680922", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip3 install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bda1d282", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:21:51.816177Z", + "iopub.status.busy": "2023-09-14T00:21:51.815928Z", + "iopub.status.idle": "2023-09-14T00:21:51.824598Z", + "shell.execute_reply": "2023-09-14T00:21:51.823913Z" + }, + "papermill": { + "duration": 0.014641, + "end_time": "2023-09-14T00:21:51.826277", + "exception": false, + "start_time": "2023-09-14T00:21:51.811636", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L6-D2560-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=6\n", + "EMBED_DIM=2560\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "951b741e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:21:51.833577Z", + "iopub.status.busy": "2023-09-14T00:21:51.833353Z", + "iopub.status.idle": "2023-09-14T00:23:39.537522Z", + "shell.execute_reply": "2023-09-14T00:23:39.536676Z" + }, + "papermill": { + "duration": 107.709983, + "end_time": "2023-09-14T00:23:39.539513", + "exception": false, + "start_time": "2023-09-14T00:21:51.829530", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-14 00:21:51-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth\r\n", + "Resolving huggingface.co (huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.33.33.55, 13.33.33.110, 13.33.33.102, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2560-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2560-E0_1-mem-ctx-512.pth%22%3B&Expires=1694910112&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxMDExMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhY2QzYTg5MTM3MTBlN2MxNzcxOTU0N2M1NWRjZGUwMjgyNmNlMmQ1OTI2MjZjMDMzOWU0MmIzOTQ4NTg0OTg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WM4HZnIOKrH24paW4nOk1cHO9YHki8seMtQ6g3vGWI7sYyvPtz%7EXzbI4q%7EME0hvvhjAcVa1%7EUWwlWKF4I1ek7wHZOZ9ySyH0VaZ4HCTI0Zx9XlaT%7E62wMWO854tDrU5iHFVfMP59Rr%7EbQCkiwanrgwg5NC8iCw7uL5t2a-LvME3l0m65K5SzgC-0IEn4nVrXpnvdCmNaBNSNecwoP8yEYIv-0%7E-yeTK0j7dVnIifdmJY6pB4UiIPfOU--LckTIv8c%7EvvUtg4DWRMEspqC%7E%7EOquCGo3OAUgIZVvyhY9nzaWsJbQoRv3DyaWyuh8nKZW%7E8-99fEW8tfubjL3gqQBLqOw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-14 00:21:52-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2560-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2560-E0_1-mem-ctx-512.pth%22%3B&Expires=1694910112&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkxMDExMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhY2QzYTg5MTM3MTBlN2MxNzcxOTU0N2M1NWRjZGUwMjgyNmNlMmQ1OTI2MjZjMDMzOWU0MmIzOTQ4NTg0OTg%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WM4HZnIOKrH24paW4nOk1cHO9YHki8seMtQ6g3vGWI7sYyvPtz%7EXzbI4q%7EME0hvvhjAcVa1%7EUWwlWKF4I1ek7wHZOZ9ySyH0VaZ4HCTI0Zx9XlaT%7E62wMWO854tDrU5iHFVfMP59Rr%7EbQCkiwanrgwg5NC8iCw7uL5t2a-LvME3l0m65K5SzgC-0IEn4nVrXpnvdCmNaBNSNecwoP8yEYIv-0%7E-yeTK0j7dVnIifdmJY6pB4UiIPfOU--LckTIv8c%7EvvUtg4DWRMEspqC%7E%7EOquCGo3OAUgIZVvyhY9nzaWsJbQoRv3DyaWyuh8nKZW%7E8-99fEW8tfubjL3gqQBLqOw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18.155.68.128, 18.155.68.94, 18.155.68.73, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.128|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "200 OK\r\n", + "Length: 1537632513 (1.4G) [binary/octet-stream]\r\n", + "Saving to: β€˜v5r3-L6-D2560-E0_1-mem-ctx-512.pth’\r\n", + "\r\n", + "\r", + " v5r3-L6-D 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 0%[ ] 18.27K 81.1KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 0%[ ] 58.27K 129KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 0%[ ] 135.27K 199KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 0%[ ] 296.27K 327KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 0%[ ] 602.27K 532KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 0%[ ] 1.20M 907KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 0%[ ] 2.42M 1.52MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 0%[ ] 4.85M 2.68MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 0%[ ] 8.65M 4.24MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 0%[ ] 12.56M 5.53MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 1%[ ] 16.43M 6.58MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 1%[ ] 20.21M 7.41MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 1%[ ] 24.07M 8.14MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 1%[ ] 27.84M 8.74MB/s eta 2m 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 2%[ ] 31.63M 9.26MB/s eta 2m 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 2%[ ] 35.51M 9.74MB/s eta 2m 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 2%[ ] 39.34M 10.2MB/s eta 2m 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 2%[ ] 43.20M 10.5MB/s eta 2m 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 3%[ ] 47.09M 10.9MB/s eta 2m 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 3%[ ] 50.88M 11.2MB/s eta 2m 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 3%[ ] 54.60M 12.0MB/s eta 2m 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 3%[ ] 58.48M 12.8MB/s eta 2m 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 4%[ ] 62.38M 13.6MB/s eta 2m 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 4%[ ] 66.21M 14.4MB/s eta 1m 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 4%[ ] 70.13M 15.2MB/s eta 1m 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 5%[> ] 73.90M 15.9MB/s eta 1m 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 5%[> ] 77.82M 16.5MB/s eta 1m 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 5%[> ] 81.73M 16.8MB/s eta 1m 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 5%[> ] 85.54M 16.8MB/s eta 1m 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 6%[> ] 89.46M 16.8MB/s eta 1m 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 6%[> ] 93.34M 16.8MB/s eta 1m 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 6%[> ] 97.13M 16.8MB/s eta 1m 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 6%[> ] 101.04M 16.8MB/s eta 1m 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 7%[> ] 104.82M 16.8MB/s eta 1m 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 7%[> ] 108.63M 16.8MB/s eta 1m 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 7%[> ] 112.42M 16.8MB/s eta 1m 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 7%[> ] 116.21M 16.8MB/s eta 1m 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 7%[> ] 117.20M 16.1MB/s eta 1m 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 8%[> ] 120.17M 15.9MB/s eta 1m 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 8%[> ] 123.92M 15.9MB/s eta 1m 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 8%[> ] 127.67M 15.9MB/s eta 1m 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 8%[> ] 130.81M 15.8MB/s eta 1m 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 9%[> ] 134.63M 15.8MB/s eta 1m 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 9%[> ] 138.42M 15.8MB/s eta 97s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 9%[> ] 142.31M 15.7MB/s eta 97s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 9%[> ] 145.38M 15.6MB/s eta 97s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 10%[=> ] 149.13M 15.6MB/s eta 97s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 10%[=> ] 152.09M 15.4MB/s eta 97s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 10%[=> ] 155.88M 15.3MB/s eta 94s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 10%[=> ] 159.67M 15.3MB/s eta 94s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 11%[=> ] 162.78M 15.1MB/s eta 94s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 11%[=> ] 166.62M 15.2MB/s eta 94s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 11%[=> ] 170.42M 15.1MB/s eta 94s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 11%[=> ] 174.28M 15.1MB/s eta 92s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 12%[=> ] 178.38M 15.2MB/s eta 92s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 12%[=> ] 182.15M 15.2MB/s eta 92s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 12%[=> ] 185.90M 15.2MB/s eta 92s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 12%[=> ] 189.65M 15.8MB/s eta 92s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 13%[=> ] 193.63M 16.0MB/s eta 89s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 13%[=> ] 197.46M 16.0MB/s eta 89s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 13%[=> ] 199.09M 15.5MB/s eta 89s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 13%[=> ] 202.45M 15.5MB/s eta 89s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 14%[=> ] 206.15M 15.6MB/s eta 89s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 14%[=> ] 209.87M 15.5MB/s eta 88s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 14%[=> ] 213.65M 15.5MB/s eta 88s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 14%[=> ] 217.40M 15.7MB/s eta 88s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 15%[==> ] 221.12M 15.7MB/s eta 88s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 15%[==> ] 224.95M 15.8MB/s eta 88s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 15%[==> ] 228.85M 15.9MB/s eta 85s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 15%[==> ] 232.63M 15.9MB/s eta 85s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 16%[==> ] 236.48M 16.0MB/s eta 85s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 16%[==> ] 240.31M 16.0MB/s eta 85s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 16%[==> ] 244.15M 16.0MB/s eta 85s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 16%[==> ] 247.99M 16.0MB/s eta 83s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 17%[==> ] 251.73M 15.9MB/s eta 83s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 17%[==> ] 255.59M 16.0MB/s eta 83s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 17%[==> ] 259.38M 16.0MB/s eta 83s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 17%[==> ] 263.13M 16.0MB/s eta 83s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 18%[==> ] 266.95M 15.9MB/s eta 81s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 18%[==> ] 270.76M 15.9MB/s eta 81s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 18%[==> ] 274.51M 16.4MB/s eta 81s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 18%[==> ] 278.38M 16.5MB/s eta 81s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 19%[==> ] 282.12M 16.5MB/s eta 81s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 19%[==> ] 285.85M 16.5MB/s eta 79s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 19%[==> ] 289.57M 16.5MB/s eta 79s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 20%[===> ] 293.38M 16.5MB/s eta 79s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 20%[===> ] 297.13M 16.6MB/s eta 79s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 20%[===> ] 300.90M 16.5MB/s eta 79s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 20%[===> ] 304.67M 16.5MB/s eta 78s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 21%[===> ] 308.38M 16.5MB/s eta 78s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 21%[===> ] 312.13M 16.5MB/s eta 78s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 21%[===> ] 316.20M 16.5MB/s eta 78s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 21%[===> ] 319.92M 16.5MB/s eta 78s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 22%[===> ] 323.67M 16.5MB/s eta 76s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 22%[===> ] 327.51M 16.5MB/s eta 76s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 22%[===> ] 331.24M 16.5MB/s eta 76s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 22%[===> ] 335.06M 16.5MB/s eta 76s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 23%[===> ] 338.79M 16.5MB/s eta 76s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 23%[===> ] 342.60M 16.5MB/s eta 74s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 23%[===> ] 346.38M 16.5MB/s eta 74s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 23%[===> ] 350.20M 16.5MB/s eta 74s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 24%[===> ] 354.12M 16.5MB/s eta 74s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 24%[===> ] 357.88M 16.5MB/s eta 74s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 24%[===> ] 361.63M 16.5MB/s eta 73s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 24%[===> ] 365.38M 16.5MB/s eta 73s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 25%[====> ] 369.10M 16.5MB/s eta 73s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 25%[====> ] 372.92M 16.5MB/s eta 73s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 25%[====> ] 376.79M 16.5MB/s eta 73s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 25%[====> ] 380.63M 16.5MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 26%[====> ] 384.38M 16.5MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 26%[====> ] 387.63M 16.4MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 26%[====> ] 388.07M 15.7MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 26%[====> ] 391.90M 15.7MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 26%[====> ] 394.49M 15.5MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 27%[====> ] 397.45M 15.3MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 27%[====> ] 400.42M 15.1MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 27%[====> ] 403.48M 15.0MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 27%[====> ] 406.03M 14.7MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 27%[====> ] 409.13M 14.6MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 28%[====> ] 412.31M 14.4MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 28%[====> ] 415.51M 14.3MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 28%[====> ] 418.73M 14.2MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 28%[====> ] 421.73M 14.0MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 28%[====> ] 424.85M 13.8MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 29%[====> ] 428.06M 13.7MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 29%[====> ] 431.29M 13.6MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 29%[====> ] 434.60M 13.5MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 29%[====> ] 437.81M 13.4MB/s eta 70s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 30%[=====> ] 441.10M 13.3MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 30%[=====> ] 444.49M 13.2MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 30%[=====> ] 447.85M 13.2MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 30%[=====> ] 451.23M 13.8MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 31%[=====> ] 454.65M 13.8MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 31%[=====> ] 458.06M 13.9MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 31%[=====> ] 461.48M 14.0MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 31%[=====> ] 464.90M 14.2MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 31%[=====> ] 468.31M 14.2MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 32%[=====> ] 471.76M 14.4MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 32%[=====> ] 475.26M 14.4MB/s eta 66s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 32%[=====> ] 477.73M 14.3MB/s eta 66s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 32%[=====> ] 481.17M 14.3MB/s eta 66s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 33%[=====> ] 484.62M 14.4MB/s eta 66s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 33%[=====> ] 488.09M 14.5MB/s eta 66s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 33%[=====> ] 491.59M 14.6MB/s eta 65s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 33%[=====> ] 495.10M 14.7MB/s eta 65s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 33%[=====> ] 498.07M 14.6MB/s eta 65s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 34%[=====> ] 501.29M 14.6MB/s eta 65s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 34%[=====> ] 504.70M 14.6MB/s eta 65s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 34%[=====> ] 508.23M 14.6MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 34%[=====> ] 511.73M 14.7MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 35%[======> ] 515.20M 14.7MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 35%[======> ] 518.76M 14.8MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 35%[======> ] 521.78M 14.7MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 35%[======> ] 525.34M 14.7MB/s eta 63s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 36%[======> ] 528.79M 14.7MB/s eta 63s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 36%[======> ] 532.29M 14.7MB/s eta 63s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 36%[======> ] 535.76M 14.7MB/s eta 63s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 36%[======> ] 539.28M 14.7MB/s eta 63s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 36%[======> ] 541.87M 14.6MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 37%[======> ] 545.32M 14.8MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 37%[======> ] 548.84M 14.8MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 37%[======> ] 552.34M 14.9MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 37%[======> ] 555.78M 14.8MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 38%[======> ] 559.32M 14.9MB/s eta 61s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 38%[======> ] 561.92M 14.6MB/s eta 61s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 38%[======> ] 565.42M 14.8MB/s eta 61s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 38%[======> ] 568.92M 14.9MB/s eta 61s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 39%[======> ] 572.49M 14.9MB/s eta 61s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 39%[======> ] 575.17M 14.7MB/s eta 60s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 39%[======> ] 578.63M 14.7MB/s eta 60s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 39%[======> ] 582.20M 14.7MB/s eta 60s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 39%[======> ] 585.71M 14.7MB/s eta 60s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 40%[=======> ] 589.18M 14.8MB/s eta 60s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 40%[=======> ] 592.70M 14.8MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 40%[=======> ] 596.18M 14.8MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 40%[=======> ] 599.78M 14.8MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 41%[=======> ] 602.70M 14.6MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 41%[=======> ] 606.18M 14.6MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 41%[=======> ] 609.79M 14.8MB/s eta 58s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 41%[=======> ] 613.31M 14.8MB/s eta 58s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 42%[=======> ] 615.92M 14.6MB/s eta 58s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 42%[=======> ] 619.49M 14.6MB/s eta 58s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 42%[=======> ] 623.07M 14.7MB/s eta 58s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 42%[=======> ] 626.73M 14.7MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 42%[=======> ] 630.40M 14.9MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 43%[=======> ] 634.06M 15.0MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 43%[=======> ] 637.68M 15.0MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 43%[=======> ] 641.40M 15.0MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 43%[=======> ] 645.04M 15.3MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 44%[=======> ] 648.71M 15.3MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 44%[=======> ] 652.49M 15.4MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 44%[=======> ] 656.26M 15.4MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 45%[========> ] 660.07M 15.3MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 45%[========> ] 663.84M 15.4MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 45%[========> ] 667.57M 15.4MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 45%[========> ] 671.35M 15.4MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 46%[========> ] 675.13M 15.7MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 46%[========> ] 678.79M 15.8MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 46%[========> ] 682.62M 15.8MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 46%[========> ] 686.34M 15.8MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 47%[========> ] 690.13M 16.1MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 47%[========> ] 693.85M 16.1MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 47%[========> ] 697.71M 16.2MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 47%[========> ] 701.46M 16.2MB/s eta 51s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 48%[========> ] 705.26M 16.2MB/s eta 51s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 48%[========> ] 709.01M 16.2MB/s eta 51s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 48%[========> ] 712.85M 16.2MB/s eta 51s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 48%[========> ] 716.67M 16.2MB/s eta 51s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 49%[========> ] 719.21M 16.0MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 49%[========> ] 722.99M 16.0MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 49%[========> ] 726.73M 16.0MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 49%[========> ] 730.45M 16.0MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 50%[=========> ] 734.38M 16.2MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 50%[=========> ] 738.12M 16.2MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 50%[=========> ] 741.13M 16.0MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 50%[=========> ] 741.59M 15.3MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 50%[=========> ] 745.43M 15.3MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 51%[=========> ] 747.93M 15.0MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 51%[=========> ] 750.78M 14.8MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 51%[=========> ] 752.87M 14.4MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 51%[=========> ] 755.03M 14.1MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 51%[=========> ] 757.21M 13.8MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 51%[=========> ] 759.43M 13.4MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 51%[=========> ] 761.67M 13.1MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 52%[=========> ] 763.93M 12.8MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 52%[=========> ] 766.23M 12.5MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 52%[=========> ] 768.54M 12.1MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 52%[=========> ] 770.87M 11.8MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 52%[=========> ] 773.23M 11.8MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 52%[=========> ] 775.60M 11.5MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 53%[=========> ] 777.99M 11.2MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 53%[=========> ] 780.40M 10.9MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 53%[=========> ] 782.82M 10.6MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 53%[=========> ] 785.24M 10.3MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 53%[=========> ] 787.70M 10.2MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 53%[=========> ] 790.15M 10.6MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 54%[=========> ] 792.63M 10.3MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 54%[=========> ] 795.10M 10.4MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 54%[=========> ] 797.60M 10.3MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 54%[=========> ] 800.10M 10.4MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 54%[=========> ] 802.60M 10.5MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 54%[=========> ] 805.12M 10.6MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 55%[==========> ] 807.63M 10.6MB/s eta 46s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 55%[==========> ] 810.17M 10.7MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 55%[==========> ] 812.70M 10.8MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 55%[==========> ] 815.23M 10.8MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 55%[==========> ] 817.76M 10.9MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 55%[==========> ] 820.31M 10.9MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 56%[==========> ] 822.85M 11.0MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 56%[==========> ] 825.40M 11.0MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 56%[==========> ] 827.95M 11.0MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 56%[==========> ] 830.48M 11.1MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 56%[==========> ] 833.04M 11.1MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 56%[==========> ] 835.59M 11.1MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 57%[==========> ] 838.15M 11.2MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 57%[==========> ] 840.71M 11.2MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 57%[==========> ] 843.26M 11.2MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 57%[==========> ] 845.81M 11.2MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 57%[==========> ] 848.37M 11.2MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 58%[==========> ] 850.92M 11.2MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 58%[==========> ] 853.46M 11.2MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 58%[==========> ] 856.03M 11.3MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 58%[==========> ] 858.57M 11.3MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 58%[==========> ] 861.12M 11.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 58%[==========> ] 863.68M 11.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 59%[==========> ] 866.23M 11.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 59%[==========> ] 868.79M 11.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 59%[==========> ] 871.34M 11.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 59%[==========> ] 873.90M 11.1MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 59%[==========> ] 876.43M 10.8MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 59%[==========> ] 878.99M 11.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 60%[===========> ] 881.56M 11.1MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 60%[===========> ] 883.37M 10.7MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 60%[===========> ] 887.10M 10.9MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 60%[===========> ] 888.93M 10.6MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 60%[===========> ] 890.81M 10.9MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 60%[===========> ] 892.73M 10.6MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 61%[===========> ] 894.70M 10.3MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 61%[===========> ] 896.70M 10.2MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 61%[===========> ] 898.54M 10.2MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 61%[===========> ] 899.99M 9.85MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 61%[===========> ] 902.09M 9.77MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 61%[===========> ] 904.21M 9.95MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 61%[===========> ] 906.38M 9.73MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 61%[===========> ] 908.57M 9.56MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 62%[===========> ] 910.79M 9.35MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 62%[===========> ] 913.04M 9.35MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 62%[===========> ] 915.31M 9.43MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 62%[===========> ] 917.60M 9.34MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 62%[===========> ] 919.93M 9.62MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 62%[===========> ] 922.26M 9.32MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 63%[===========> ] 924.62M 9.54MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 63%[===========> ] 927.01M 9.54MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 63%[===========> ] 929.40M 9.66MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 63%[===========> ] 931.81M 9.83MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 63%[===========> ] 934.23M 10.0MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 63%[===========> ] 936.68M 9.99MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 64%[===========> ] 939.13M 10.1MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 64%[===========> ] 941.60M 10.1MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 64%[===========> ] 944.09M 10.3MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 64%[===========> ] 946.57M 10.5MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 64%[===========> ] 949.07M 10.3MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 64%[===========> ] 951.57M 10.5MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 65%[============> ] 954.09M 10.7MB/s eta 37s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 65%[============> ] 956.54M 10.8MB/s eta 37s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 65%[============> ] 959.06M 10.6MB/s eta 37s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 65%[============> ] 961.60M 10.6MB/s eta 37s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 65%[============> ] 964.13M 10.8MB/s eta 37s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 65%[============> ] 966.70M 10.9MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 66%[============> ] 969.24M 10.8MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 66%[============> ] 971.79M 10.7MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 66%[============> ] 974.35M 10.9MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 66%[============> ] 976.92M 11.1MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 66%[============> ] 979.48M 11.1MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 66%[============> ] 982.04M 10.8MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 67%[============> ] 984.60M 11.0MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 67%[============> ] 987.18M 11.2MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 67%[============> ] 989.74M 11.1MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 67%[============> ] 992.31M 11.1MB/s eta 35s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 67%[============> ] 994.88M 11.2MB/s eta 35s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 68%[============> ] 997.45M 11.1MB/s eta 35s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 68%[============> ] 1000M 11.1MB/s eta 35s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 68%[============> ] 1003M 11.3MB/s eta 35s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 68%[============> ] 1005M 11.2MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 68%[============> ] 1008M 11.3MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 68%[============> ] 1010M 11.2MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 69%[============> ] 1013M 11.2MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 69%[============> ] 1015M 11.2MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 69%[============> ] 1018M 11.2MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 69%[============> ] 1021M 11.0MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 69%[============> ] 1023M 11.2MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 69%[============> ] 1.00G 11.3MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 70%[=============> ] 1.00G 11.2MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 70%[=============> ] 1.01G 11.0MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 70%[=============> ] 1.01G 11.2MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 70%[=============> ] 1.01G 11.2MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 70%[=============> ] 1.01G 11.0MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 70%[=============> ] 1.02G 11.2MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 71%[=============> ] 1.02G 11.4MB/s eta 31s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 71%[=============> ] 1.02G 11.4MB/s eta 31s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 71%[=============> ] 1.02G 11.2MB/s eta 31s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 71%[=============> ] 1.03G 11.3MB/s eta 31s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 71%[=============> ] 1.03G 11.4MB/s eta 31s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 72%[=============> ] 1.03G 11.4MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 72%[=============> ] 1.03G 11.4MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 72%[=============> ] 1.04G 11.4MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 72%[=============> ] 1.04G 11.3MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 72%[=============> ] 1.04G 11.1MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 72%[=============> ] 1.04G 11.5MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 73%[=============> ] 1.05G 11.3MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 73%[=============> ] 1.05G 11.2MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 73%[=============> ] 1.05G 11.4MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 73%[=============> ] 1.05G 11.4MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 73%[=============> ] 1.06G 11.3MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 74%[=============> ] 1.06G 11.5MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 74%[=============> ] 1.06G 11.7MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 74%[=============> ] 1.07G 11.6MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 74%[=============> ] 1.07G 11.4MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 74%[=============> ] 1.07G 11.6MB/s eta 28s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 75%[==============> ] 1.07G 11.9MB/s eta 28s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 75%[==============> ] 1.08G 11.9MB/s eta 28s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 75%[==============> ] 1.08G 11.9MB/s eta 28s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 75%[==============> ] 1.08G 11.9MB/s eta 28s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 75%[==============> ] 1.08G 12.0MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 75%[==============> ] 1.09G 12.0MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 76%[==============> ] 1.09G 11.9MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 76%[==============> ] 1.09G 12.2MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 76%[==============> ] 1.10G 12.4MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 76%[==============> ] 1.10G 12.5MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 77%[==============> ] 1.10G 12.4MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 77%[==============> ] 1.11G 12.3MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 77%[==============> ] 1.11G 12.7MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 77%[==============> ] 1.11G 12.9MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 77%[==============> ] 1.12G 13.0MB/s eta 24s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 78%[==============> ] 1.12G 12.9MB/s eta 24s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 78%[==============> ] 1.12G 13.1MB/s eta 24s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 78%[==============> ] 1.12G 13.2MB/s eta 24s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 78%[==============> ] 1.13G 13.1MB/s eta 24s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 79%[==============> ] 1.13G 13.5MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 79%[==============> ] 1.14G 13.7MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 79%[==============> ] 1.14G 13.9MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 79%[==============> ] 1.14G 14.1MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 80%[===============> ] 1.15G 14.3MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 80%[===============> ] 1.15G 14.4MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 80%[===============> ] 1.15G 14.5MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 80%[===============> ] 1.16G 14.7MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 81%[===============> ] 1.16G 14.9MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 81%[===============> ] 1.17G 15.0MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 81%[===============> ] 1.17G 15.2MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 81%[===============> ] 1.17G 15.3MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 82%[===============> ] 1.18G 15.4MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 82%[===============> ] 1.18G 15.5MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 82%[===============> ] 1.18G 15.7MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 82%[===============> ] 1.19G 15.8MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 83%[===============> ] 1.19G 16.1MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 83%[===============> ] 1.19G 16.2MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 83%[===============> ] 1.20G 16.3MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 83%[===============> ] 1.20G 16.4MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 84%[===============> ] 1.21G 16.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 84%[===============> ] 1.21G 16.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 84%[===============> ] 1.21G 16.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 84%[===============> ] 1.22G 16.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 85%[================> ] 1.22G 16.5MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 85%[================> ] 1.22G 16.5MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 85%[================> ] 1.23G 16.5MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 86%[================> ] 1.23G 16.5MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 86%[================> ] 1.24G 16.5MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 86%[================> ] 1.24G 16.5MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 86%[================> ] 1.24G 16.5MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2.pth 87%[================> ] 1.25G 16.5MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 87%[================> ] 1.25G 16.5MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 87%[================> ] 1.25G 16.5MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 87%[================> ] 1.26G 16.6MB/s eta 14s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 88%[================> ] 1.26G 16.5MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 88%[================> ] 1.26G 16.5MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 88%[================> ] 1.27G 16.5MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 88%[================> ] 1.27G 16.5MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 89%[================> ] 1.28G 16.5MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 89%[================> ] 1.28G 16.5MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 89%[================> ] 1.28G 16.5MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 89%[================> ] 1.28G 16.0MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6 89%[================> ] 1.29G 15.8MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6- 90%[=================> ] 1.29G 15.6MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D 90%[=================> ] 1.29G 15.5MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2 90%[=================> ] 1.30G 15.3MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D25 90%[=================> ] 1.30G 15.2MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D256 90%[=================> ] 1.30G 15.1MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560 91%[=================> ] 1.31G 15.0MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560- 91%[=================> ] 1.31G 14.9MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E 91%[=================> ] 1.31G 14.8MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0 91%[=================> ] 1.32G 14.7MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_ 92%[=================> ] 1.32G 14.6MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2560-E0_1 92%[=================> ] 1.32G 14.6MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 92%[=================> ] 1.33G 14.3MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L6-D2560-E0_1-m 92%[=================> ] 1.33G 14.3MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L6-D2560-E0_1-me 93%[=================> ] 1.33G 14.3MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L6-D2560-E0_1-mem 93%[=================> ] 1.34G 14.3MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D2560-E0_1-mem- 93%[=================> ] 1.34G 14.2MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D2560-E0_1-mem-c 93%[=================> ] 1.34G 14.2MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D2560-E0_1-mem-ct 94%[=================> ] 1.35G 14.2MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2560-E0_1-mem-ctx 94%[=================> ] 1.35G 14.8MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2560-E0_1-mem-ctx- 94%[=================> ] 1.35G 15.0MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2560-E0_1-mem-ctx-5 94%[=================> ] 1.36G 15.2MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "560-E0_1-mem-ctx-51 95%[==================> ] 1.36G 15.3MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "60-E0_1-mem-ctx-512 95%[==================> ] 1.37G 15.4MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0-E0_1-mem-ctx-512. 95%[==================> ] 1.37G 15.5MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-512.p 95%[==================> ] 1.37G 15.7MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-512.pt 96%[==================> ] 1.38G 15.8MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-512.pth 96%[==================> ] 1.38G 15.9MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-512.pth 96%[==================> ] 1.38G 16.0MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-512.pth 96%[==================> ] 1.39G 16.0MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-512.pth 97%[==================> ] 1.39G 16.2MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-512.pth 97%[==================> ] 1.40G 16.2MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-512.pth 97%[==================> ] 1.40G 16.4MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-512.pth 97%[==================> ] 1.40G 16.5MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-512.pth 98%[==================> ] 1.41G 16.5MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-512.pth 98%[==================> ] 1.41G 16.5MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-512.pth 98%[==================> ] 1.41G 16.5MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-512.pth 99%[==================> ] 1.42G 16.6MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-512.pth 99%[==================> ] 1.42G 16.5MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "512.pth 99%[==================> ] 1.42G 16.5MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12.pth 99%[==================> ] 1.43G 16.5MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2560-E0_1- 100%[===================>] 1.43G 17.1MB/s in 1m 46s \r\n", + "\r\n", + "2023-09-14 00:23:39 (13.9 MB/s) - β€˜v5r3-L6-D2560-E0_1-mem-ctx-512.pth’ saved [1537632513/1537632513]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1.5G\r\n", + "drwxr-xr-x 2 root root 3 Sep 14 00:21 .\r\n", + "drwxr-xr-x 20 root root 24 Sep 14 00:21 ..\r\n", + "-rw-r--r-- 1 root root 1.5G Sep 13 12:45 v5r3-L6-D2560-E0_1-mem-ctx-512.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "cell_type": "markdown", + "id": "44993c1b", + "metadata": { + "papermill": { + "duration": 0.040583, + "end_time": "2023-09-14T00:23:39.603420", + "exception": false, + "start_time": "2023-09-14T00:23:39.562837", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 3 : Ramping up the ctx size (8192), memory training\n", + "\n", + "- Tune 3: Large ctx size (8192), Scaling up!" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3d8f956f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:23:39.641463Z", + "iopub.status.busy": "2023-09-14T00:23:39.641175Z", + "iopub.status.idle": "2023-09-14T00:24:02.625663Z", + "shell.execute_reply": "2023-09-14T00:24:02.625201Z" + }, + "papermill": { + "duration": 23.023454, + "end_time": "2023-09-14T00:24:02.645057", + "exception": false, + "start_time": "2023-09-14T00:23:39.621603", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 10 max words, 100 samples - at ../dataset/gen-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 30 max words, 100 samples - at ../dataset/gen-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 20 max words, 100 samples - at ../dataset/gen-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5 max words, 100 samples - at ../dataset/gen-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 15 max words, 100 samples - at ../dataset/gen-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 35 max words, 100 samples - at ../dataset/gen-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 25 max words, 100 samples - at ../dataset/gen-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 70 max words, 100 samples - at ../dataset/gen-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 80 max words, 100 samples - at ../dataset/gen-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 90 max words, 100 samples - at ../dataset/gen-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 55 max words, 100 samples - at ../dataset/gen-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 60 max words, 100 samples - at ../dataset/gen-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 45 max words, 100 samples - at ../dataset/gen-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 65 max words, 100 samples - at ../dataset/gen-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 100 max words, 100 samples - at ../dataset/gen-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 75 max words, 100 samples - at ../dataset/gen-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 50 max words, 100 samples - at ../dataset/gen-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 85 max words, 100 samples - at ../dataset/gen-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 95 max words, 100 samples - at ../dataset/gen-word-95-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 357 samples (10 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 328 samples (10 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 277 samples (10 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 383 samples (10 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 300 samples (10 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 657 samples (10 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 746 samples (10 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 479 samples (10 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 438 samples (10 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 1057 samples (10 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 586 samples (10 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 1300 samples (10 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 317 samples (10 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 527 samples (10 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 873 samples (10 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 262 samples (10 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 117 samples (20 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 180 samples (20 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 410 samples (10 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 79 samples (20 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 63 samples (20 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 40 max words, 100 samples - at ../dataset/gen-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 524 samples (20 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 1765 samples (10 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 2594 samples (10 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 270 samples (20 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 80 samples (20 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 99 samples (20 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 60 samples (20 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 139 samples (20 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 27 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 55 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 25 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 39 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 5535 samples (10 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 100 max words, 2000 samples - at ../dataset/gen-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 200 max words, 2000 samples - at ../dataset/gen-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 300 max words, 2000 samples - at ../dataset/gen-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 500 max words, 2000 samples - at ../dataset/gen-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 400 max words, 2000 samples - at ../dataset/gen-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 700 max words, 2000 samples - at ../dataset/gen-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 600 max words, 2000 samples - at ../dataset/gen-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1000 max words, 2000 samples - at ../dataset/gen-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 800 max words, 2000 samples - at ../dataset/gen-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 900 max words, 2000 samples - at ../dataset/gen-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 2.2G\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 20K Sep 14 00:23 gen-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 2.1M Sep 14 00:23 gen-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 20M Sep 14 00:23 gen-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 22M Sep 14 00:23 gen-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 23M Sep 14 00:23 gen-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 25M Sep 14 00:23 gen-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 27M Sep 14 00:23 gen-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 25K Sep 14 00:23 gen-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 29M Sep 14 00:23 gen-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 31M Sep 14 00:23 gen-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 33M Sep 14 00:23 gen-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 35M Sep 14 00:23 gen-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 37M Sep 14 00:23 gen-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 30K Sep 14 00:23 gen-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 4.0M Sep 14 00:23 gen-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 39M Sep 14 00:23 gen-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 41M Sep 14 00:23 gen-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 42M Sep 14 00:23 gen-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 44M Sep 14 00:23 gen-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 46M Sep 14 00:23 gen-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 34K Sep 14 00:23 gen-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 48M Sep 14 00:23 gen-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 50M Sep 14 00:23 gen-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 52M Sep 14 00:23 gen-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 54M Sep 14 00:23 gen-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 56M Sep 14 00:23 gen-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 40K Sep 14 00:23 gen-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 5.9M Sep 14 00:23 gen-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 58M Sep 14 00:23 gen-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 60M Sep 14 00:23 gen-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 61M Sep 14 00:23 gen-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 63M Sep 14 00:23 gen-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 65M Sep 14 00:23 gen-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 44K Sep 14 00:23 gen-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 67M Sep 14 00:23 gen-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 69M Sep 14 00:23 gen-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 71M Sep 14 00:23 gen-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 73M Sep 14 00:23 gen-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 75M Sep 14 00:23 gen-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 49K Sep 14 00:23 gen-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 7.9M Sep 14 00:23 gen-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 77M Sep 14 00:23 gen-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 79M Sep 14 00:23 gen-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 80M Sep 14 00:23 gen-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 82M Sep 14 00:23 gen-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 84M Sep 14 00:23 gen-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 54K Sep 14 00:23 gen-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 86M Sep 14 00:23 gen-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 88M Sep 14 00:23 gen-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 90M Sep 14 00:23 gen-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 92M Sep 14 00:23 gen-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 94M Sep 14 00:23 gen-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 15K Sep 14 00:23 gen-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 59K Sep 14 00:23 gen-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 9.7M Sep 14 00:23 gen-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 96M Sep 14 00:23 gen-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 98M Sep 14 00:23 gen-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 99M Sep 14 00:23 gen-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 101M Sep 14 00:24 gen-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 103M Sep 14 00:23 gen-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 64K Sep 14 00:23 gen-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 105M Sep 14 00:23 gen-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 107M Sep 14 00:23 gen-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 109M Sep 14 00:24 gen-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 111M Sep 14 00:24 gen-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 113M Sep 14 00:24 gen-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 68K Sep 14 00:23 gen-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 12M Sep 14 00:23 gen-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 115M Sep 14 00:23 gen-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 117M Sep 14 00:23 gen-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 118M Sep 14 00:23 gen-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 120M Sep 14 00:24 gen-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 122M Sep 14 00:24 gen-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 73K Sep 14 00:23 gen-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 124M Sep 14 00:24 gen-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 126M Sep 14 00:24 gen-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 128M Sep 14 00:24 gen-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 130M Sep 14 00:24 gen-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 132M Sep 14 00:24 gen-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 77K Sep 14 00:23 gen-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 14M Sep 14 00:23 gen-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 134M Sep 14 00:24 gen-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 136M Sep 14 00:24 gen-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 137M Sep 14 00:24 gen-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 139M Sep 14 00:24 gen-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 141M Sep 14 00:24 gen-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 84K Sep 14 00:23 gen-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 143M Sep 14 00:24 gen-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 145M Sep 14 00:24 gen-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 147M Sep 14 00:24 gen-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 149M Sep 14 00:24 gen-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 151M Sep 14 00:24 gen-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 89K Sep 14 00:23 gen-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 16M Sep 14 00:23 gen-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 153M Sep 14 00:24 gen-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 93K Sep 14 00:23 gen-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 98K Sep 14 00:23 gen-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 18M Sep 14 00:23 gen-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 104K Sep 14 00:23 gen-word-95-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 500K Sep 14 00:23 shuffle-word-10-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 567K Sep 14 00:23 shuffle-word-100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-1100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 523K Sep 14 00:23 shuffle-word-1200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-1400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 427K Sep 14 00:23 shuffle-word-15-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-1600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 517K Sep 14 00:23 shuffle-word-1700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 519K Sep 14 00:23 shuffle-word-1800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-1900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 381K Sep 14 00:23 shuffle-word-20-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 543K Sep 14 00:23 shuffle-word-200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-2000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 521K Sep 14 00:23 shuffle-word-2100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 518K Sep 14 00:23 shuffle-word-2200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-2300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-2400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 358K Sep 14 00:23 shuffle-word-25-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 520K Sep 14 00:23 shuffle-word-2500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 513K Sep 14 00:23 shuffle-word-2600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 512K Sep 14 00:23 shuffle-word-2700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-2800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-2900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 338K Sep 14 00:23 shuffle-word-30-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 530K Sep 14 00:23 shuffle-word-300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 320K Sep 14 00:23 shuffle-word-35-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-3700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-3900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 324K Sep 14 00:23 shuffle-word-40-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 530K Sep 14 00:23 shuffle-word-400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-4300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 313K Sep 14 00:23 shuffle-word-45-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-4900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 791K Sep 14 00:23 shuffle-word-5-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 299K Sep 14 00:23 shuffle-word-50-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 524K Sep 14 00:23 shuffle-word-500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-5400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 302K Sep 14 00:23 shuffle-word-55-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-5500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-5900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 298K Sep 14 00:23 shuffle-word-60-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 525K Sep 14 00:23 shuffle-word-600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 299K Sep 14 00:23 shuffle-word-65-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-6700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-6900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 297K Sep 14 00:23 shuffle-word-70-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 526K Sep 14 00:23 shuffle-word-700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7100-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7200-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7300-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7400-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 293K Sep 14 00:23 shuffle-word-75-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7500-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7600-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-7700-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 508K Sep 14 00:23 shuffle-word-7900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 289K Sep 14 00:23 shuffle-word-80-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 525K Sep 14 00:23 shuffle-word-800-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 509K Sep 14 00:23 shuffle-word-8000-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 288K Sep 14 00:23 shuffle-word-85-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 284K Sep 14 00:23 shuffle-word-90-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 522K Sep 14 00:23 shuffle-word-900-count.jsonl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-rw-r--r-- 1 root root 284K Sep 14 00:23 shuffle-word-95-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Go to config dir\n", + "cd \"../\"\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We reduce the training set for < 50 words - and shift the focus upwards\n", + "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n", + "#\n", + "for i in {5..100..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 4200 words dataset\n", + "# \n", + "for i in {100..8000..100} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n", + " python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -lh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "701b6753", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:24:02.712544Z", + "iopub.status.busy": "2023-09-14T00:24:02.712008Z", + "iopub.status.idle": "2023-09-14T00:24:35.560189Z", + "shell.execute_reply": "2023-09-14T00:24:35.559394Z" + }, + "papermill": { + "duration": 32.883629, + "end_time": "2023-09-14T00:24:35.562107", + "exception": false, + "start_time": "2023-09-14T00:24:02.678478", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-14 00:24:05,633] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2560-E0_1-mem-ctx-512.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/', '--model.lr_init=4e-4', '--model.lr_final=2e-4', '--data.max_token_size=8192', '--data.sort_by_length=True', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5r3-L6-D2560-E0_1-mem-ctx-512.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4194784656\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 4194784656\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.10\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230914_002408-wrr91tv7\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: πŸš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/wrr91tv7\u001b[0m\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:554: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\r\n", + "TPU available: False, using: 0 TPU cores\r\n", + "IPU available: False, using: 0 IPUs\r\n", + "HPU available: False, using: 0 HPUs\r\n", + "\r\n", + "\r\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n", + " - target_batch_size: 256\r\n", + " - num_nodes: 1\r\n", + " - num_devices: 1\r\n", + " - accumulate_grad_batches: 256\r\n", + " - effective_batch_size: 256\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Resolving data files: 0%| | 0/198 [00:00: JSON parse error: Missing a comma or '}' after an object member. in row 233\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 136606 examples [00:04, 35027.49 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 140455 examples [00:04, 20618.59 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 143531 examples [00:05, 21445.75 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 146681 examples [00:05, 22082.33 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 149694 examples [00:05, 22730.77 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 152975 examples [00:05, 22489.21 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 155530 examples [00:05, 20086.23 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 158092 examples [00:05, 15943.50 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 160242 examples [00:06, 15481.26 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 162322 examples [00:06, 14098.18 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 164028 examples [00:06, 13989.96 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 165725 examples [00:06, 13009.40 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 167287 examples [00:06, 12674.13 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 169262 examples [00:06, 13937.10 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 170803 examples [00:06, 14075.62 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 172262 examples [00:06, 13051.73 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 173727 examples [00:07, 12319.32 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 175213 examples [00:07, 12084.54 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 176512 examples [00:07, 11651.88 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 177716 examples [00:07, 11150.69 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 178902 examples [00:07, 11209.52 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 180206 examples [00:07, 11234.12 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 181684 examples [00:07, 11864.15 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 182992 examples [00:07, 11920.17 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Generating train split: 183622 examples [00:08, 22556.31 examples/s]\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "multiprocess.pool.RemoteTraceback: \r\n", + "\"\"\"\r\n", + "Traceback (most recent call last):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 144, in _generate_tables\r\n", + " dataset = json.load(f)\r\n", + " File \"/usr/lib/python3.10/json/__init__.py\", line 293, in load\r\n", + " return loads(fp.read(),\r\n", + " File \"/usr/lib/python3.10/json/__init__.py\", line 346, in loads\r\n", + " return _default_decoder.decode(s)\r\n", + " File \"/usr/lib/python3.10/json/decoder.py\", line 340, in decode\r\n", + " raise JSONDecodeError(\"Extra data\", s, end)\r\n", + "json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 1121)\r\n", + "\r\n", + "During handling of the above exception, another exception occurred:\r\n", + "\r\n", + "Traceback (most recent call last):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1925, in _prepare_split_single\r\n", + " for _, table in generator:\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 147, in _generate_tables\r\n", + " raise e\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json/json.py\", line 121, in _generate_tables\r\n", + " pa_table = paj.read_json(\r\n", + " File \"pyarrow/_json.pyx\", line 258, in pyarrow._json.read_json\r\n", + " File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\r\n", + " File \"pyarrow/error.pxi\", line 100, in pyarrow.lib.check_status\r\n", + "pyarrow.lib.ArrowInvalid: JSON parse error: Missing a comma or '}' after an object member. in row 233\r\n", + "\r\n", + "The above exception was the direct cause of the following exception:\r\n", + "\r\n", + "Traceback (most recent call last):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 125, in worker\r\n", + " result = (True, func(*args, **kwds))\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1347, in _write_generator_to_queue\r\n", + " for i, result in enumerate(func(**kwargs)):\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1958, in _prepare_split_single\r\n", + " raise DatasetGenerationError(\"An error occurred while generating the dataset\") from e\r\n", + "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n", + "\"\"\"\r\n", + "\r\n", + "The above exception was the direct cause of the following exception:\r\n", + "\r\n", + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 258, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 549, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2136, in load_dataset\r\n", + " builder_instance.download_and_prepare(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 954, in download_and_prepare\r\n", + " self._download_and_prepare(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1049, in _download_and_prepare\r\n", + " self._prepare_split(split_generator, **prepare_split_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/builder.py\", line 1842, in _prepare_split\r\n", + " for job_id, done, content in iflatmap_unordered(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in iflatmap_unordered\r\n", + " [async_result.get(timeout=0.05) for async_result in async_results]\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py\", line 1387, in \r\n", + " [async_result.get(timeout=0.05) for async_result in async_results]\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/multiprocess/pool.py\", line 774, in get\r\n", + " raise self._value\r\n", + "datasets.builder.DatasetGenerationError: An error occurred while generating the dataset\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: πŸš€ View run \u001b[33mv5r3-L6-D2560-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/wrr91tv7\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚑ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v53\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230914_002408-wrr91tv7/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n", + " --model.lr_init=4e-4 \\\n", + " --model.lr_final=2e-4 \\\n", + " --data.max_token_size=8192 \\\n", + " --data.sort_by_length=True \\\n", + " --model.ctx_len=4096 \\\n", + " --model.bptt_learning_range=2 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "500c7607", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:24:35.637916Z", + "iopub.status.busy": "2023-09-14T00:24:35.637654Z", + "iopub.status.idle": "2023-09-14T00:24:38.174650Z", + "shell.execute_reply": "2023-09-14T00:24:38.173904Z" + }, + "papermill": { + "duration": 2.574032, + "end_time": "2023-09-14T00:24:38.176339", + "exception": false, + "start_time": "2023-09-14T00:24:35.602307", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-14 00:24:37,304] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2560-E0_1-mem-ctx-8k/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a169a91a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:24:38.251360Z", + "iopub.status.busy": "2023-09-14T00:24:38.251096Z", + "iopub.status.idle": "2023-09-14T00:24:38.489940Z", + "shell.execute_reply": "2023-09-14T00:24:38.489202Z" + }, + "papermill": { + "duration": 0.276215, + "end_time": "2023-09-14T00:24:38.491553", + "exception": false, + "start_time": "2023-09-14T00:24:38.215338", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "57ad36b1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-14T00:24:38.567093Z", + "iopub.status.busy": "2023-09-14T00:24:38.566558Z", + "iopub.status.idle": "2023-09-14T00:24:38.802491Z", + "shell.execute_reply": "2023-09-14T00:24:38.801579Z" + }, + "papermill": { + "duration": 0.273683, + "end_time": "2023-09-14T00:24:38.804465", + "exception": false, + "start_time": "2023-09-14T00:24:38.530782", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 171.070576, + "end_time": "2023-09-14T00:24:38.962638", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb", + "parameters": {}, + "start_time": "2023-09-14T00:21:47.892062", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file