diff --git "a/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part2.ipynb" "b/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part2.ipynb" deleted file mode 100644--- "a/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part2.ipynb" +++ /dev/null @@ -1,214993 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "53a291e4", - "metadata": { - "papermill": { - "duration": 0.003961, - "end_time": "2023-08-30T08:22:22.778095", - "exception": false, - "start_time": "2023-08-30T08:22:22.774134", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# RWKV v5 / embedding init-range 1e-01 / 4k\n", - "\n", - "- 96 layers\n", - "- 1024 embedding size\n", - "\n", - "Going through the modified memory training for v5 models, across various initial embedding model weights\n", - "\n", - "**Note:** This project assumes you have the rwkv-infctx conda env setup" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "91f7f260", - "metadata": { - "papermill": { - "duration": 0.002995, - "end_time": "2023-08-30T08:22:22.784052", - "exception": false, - "start_time": "2023-08-30T08:22:22.781057", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Basic Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "b39e9dd1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-08-30T08:22:22.791466Z", - "iopub.status.busy": "2023-08-30T08:22:22.790587Z", - "iopub.status.idle": "2023-08-30T08:22:23.503862Z", - "shell.execute_reply": "2023-08-30T08:22:23.502852Z" - }, - "papermill": { - "duration": 0.71881, - "end_time": "2023-08-30T08:22:23.505801", - "exception": false, - "start_time": "2023-08-30T08:22:22.786991", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# First lets setup the various directories, and init the model\n", - "!mkdir -p ../../../../model/\n", - "!mkdir -p ../../../../datapath/\n", - "!mkdir -p ../../../../checkpoint/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "daeb5b3a", - "metadata": { - "execution": { - "iopub.execute_input": "2023-08-30T08:22:23.512674Z", - "iopub.status.busy": "2023-08-30T08:22:23.512484Z", - "iopub.status.idle": "2023-08-30T08:22:26.376630Z", - "shell.execute_reply": "2023-08-30T08:22:26.376082Z" - }, - "papermill": { - "duration": 2.869674, - "end_time": "2023-08-30T08:22:26.378456", - "exception": false, - "start_time": "2023-08-30T08:22:23.508782", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", - "\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\r\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Additional dependencies for eval stuff\n", - "!pip install -q aiocsv aiofiles" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "0643f038", - "metadata": { - "execution": { - "iopub.execute_input": "2023-08-30T08:22:26.386156Z", - "iopub.status.busy": "2023-08-30T08:22:26.385827Z", - "iopub.status.idle": "2023-08-30T08:22:26.392075Z", - "shell.execute_reply": "2023-08-30T08:22:26.391598Z" - }, - "papermill": { - "duration": 0.011497, - "end_time": "2023-08-30T08:22:26.393198", - "exception": false, - "start_time": "2023-08-30T08:22:26.381701", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DEEPSPEED_STRAT: deepspeed_stage_1\n", - "ENABLE_WANDB: True\n", - "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory\n", - "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" - ] - } - ], - "source": [ - "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", - "GPU_DEVICES=\"auto\"\n", - "ENABLE_WANDB=True\n", - "\n", - "# Layer count and embed dim to start with\n", - "LAYER_COUNT=96\n", - "EMBED_DIM=1024\n", - "\n", - "# Wavnet compatibility?\n", - "RWKV_WAVENET_LAYERS=0\n", - "\n", - "EMBED_SCALE=0.1\n", - "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", - "\n", - "WANDB_PREFIX=f\"v5-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", - "FILENAME_PREFIX=f\"v5-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", - "\n", - "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", - "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", - "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", - "\n", - "if ENABLE_WANDB:\n", - " WANDB_MODE=\"online\"\n", - "else:\n", - " WANDB_MODE=\"disabled\"\n", - "\n", - "# Computing the notebook, and various paths\n", - "import os\n", - "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", - "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", - "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "\n", - "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", - "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", - "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", - "print(\"PROJECT_DIR:\", PROJECT_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cf51c273", - "metadata": { - "execution": { - "iopub.execute_input": "2023-08-30T08:22:26.400113Z", - "iopub.status.busy": "2023-08-30T08:22:26.399960Z", - "iopub.status.idle": "2023-08-30T08:23:49.811179Z", - "shell.execute_reply": "2023-08-30T08:23:49.810299Z" - }, - "papermill": { - "duration": 83.417299, - "end_time": "2023-08-30T08:23:49.813583", - "exception": false, - "start_time": "2023-08-30T08:22:26.396284", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-08-30 08:22:26-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-enwiki-4k.pth\r\n", - "Resolving huggingface.co (huggingface.co)... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "52.85.242.8, 52.85.242.16, 52.85.242.35, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|52.85.242.8|:443... connected.\r\n", - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/cf564bd00443254a3460507c8f020d65f3f7de6598329ad60d7ceb74fdf94786?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-enwiki-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-enwiki-4k.pth%22%3B&Expires=1693642946&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MzY0Mjk0Nn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2NmNTY0YmQwMDQ0MzI1NGEzNDYwNTA3YzhmMDIwZDY1ZjNmN2RlNjU5ODMyOWFkNjBkN2NlYjc0ZmRmOTQ3ODY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=kKM4zUfc%7ET9Whrapp1pQC9W3KFsBRyWgug8bDuEWC-Yjpj1Bp2Wwkg7wT0PQyr8N7TdrRIf5tcoa85bZf8V2w5HMuc8Bz1lTKwQrP%7E16hZ51tlDwV4nmyxRzPk1iGTSYCAQxaLsj%7E2FfeWTMToqNVeugFtPRcMRzjQa-alBYdkey20U7QDwqziARnLp461lidSjT9y-KDkDd6saWeq7auhmZqytSMGb47Xrmg9pWg1j2Ay%7EhvNGBqIZYDp%7E-2UPG8dgqu1bKNaicfLObYOaqDzLoQ4%7EFKF48KGr0Vy1oKC5kVG14PMT2rR1w%7E8UCMFA6UXkUqZJnSI0ol7vp32Od4Q__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-08-30 08:22:26-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/cf564bd00443254a3460507c8f020d65f3f7de6598329ad60d7ceb74fdf94786?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-enwiki-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-enwiki-4k.pth%22%3B&Expires=1693642946&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MzY0Mjk0Nn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2NmNTY0YmQwMDQ0MzI1NGEzNDYwNTA3YzhmMDIwZDY1ZjNmN2RlNjU5ODMyOWFkNjBkN2NlYjc0ZmRmOTQ3ODY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=kKM4zUfc%7ET9Whrapp1pQC9W3KFsBRyWgug8bDuEWC-Yjpj1Bp2Wwkg7wT0PQyr8N7TdrRIf5tcoa85bZf8V2w5HMuc8Bz1lTKwQrP%7E16hZ51tlDwV4nmyxRzPk1iGTSYCAQxaLsj%7E2FfeWTMToqNVeugFtPRcMRzjQa-alBYdkey20U7QDwqziARnLp461lidSjT9y-KDkDd6saWeq7auhmZqytSMGb47Xrmg9pWg1j2Ay%7EhvNGBqIZYDp%7E-2UPG8dgqu1bKNaicfLObYOaqDzLoQ4%7EFKF48KGr0Vy1oKC5kVG14PMT2rR1w%7E8UCMFA6UXkUqZJnSI0ol7vp32Od4Q__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "108.157.214.46, 108.157.214.7, 108.157.214.31, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.157.214.46|:443... connected.\r\n", - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 2825974771 (2.6G) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L96-D1024-E0_1-enwiki-4k.pth’\r\n", - "\r\n", - "\r", - " v5-L96-D1 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 0%[ ] 151.27K 557KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 0%[ ] 1.09M 2.31MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 0%[ ] 3.75M 5.59MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 0%[ ] 11.14M 12.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 0%[ ] 16.47M 15.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 0%[ ] 23.64M 17.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 1%[ ] 32.31M 21.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 1%[ ] 40.86M 23.3MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 1%[ ] 49.29M 25.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 2%[ ] 56.71M 26.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 2%[ ] 64.90M 27.3MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 2%[ ] 75.32M 28.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 3%[ ] 83.40M 29.7MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 3%[ ] 91.23M 29.9MB/s eta 87s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 3%[ ] 99.36M 30.4MB/s eta 87s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 3%[ ] 106.59M 32.2MB/s eta 87s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 4%[ ] 114.45M 34.4MB/s eta 87s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 4%[ ] 123.25M 36.5MB/s eta 87s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 4%[ ] 132.82M 37.7MB/s eta 79s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 5%[> ] 140.60M 38.3MB/s eta 79s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 5%[> ] 146.04M 38.2MB/s eta 79s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 5%[> ] 154.66M 38.4MB/s eta 79s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 6%[> ] 163.83M 39.1MB/s eta 79s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 6%[> ] 172.42M 38.8MB/s eta 75s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 6%[> ] 180.67M 38.4MB/s eta 75s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 6%[> ] 188.49M 38.5MB/s eta 75s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 7%[> ] 196.08M 38.1MB/s eta 75s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 7%[> ] 203.02M 37.9MB/s eta 75s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 7%[> ] 209.95M 37.8MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 8%[> ] 217.47M 37.1MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 8%[> ] 223.88M 37.3MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 8%[> ] 230.88M 37.2MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 8%[> ] 237.73M 36.7MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 9%[> ] 245.03M 35.8MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 9%[> ] 251.82M 35.2MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 9%[> ] 259.41M 35.2MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 9%[> ] 267.28M 35.2MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 10%[=> ] 274.27M 34.6MB/s eta 73s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 10%[=> ] 279.45M 34.4MB/s eta 72s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 10%[=> ] 288.63M 33.5MB/s eta 72s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 10%[=> ] 295.89M 34.1MB/s eta 72s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 11%[=> ] 303.42M 33.7MB/s eta 72s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 11%[=> ] 310.86M 33.7MB/s eta 72s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 11%[=> ] 317.24M 33.4MB/s eta 71s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 12%[=> ] 325.38M 33.9MB/s eta 71s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 12%[=> ] 332.83M 34.2MB/s eta 71s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 12%[=> ] 340.06M 34.2MB/s eta 71s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 12%[=> ] 347.24M 34.2MB/s eta 71s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 13%[=> ] 353.79M 33.8MB/s eta 69s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 13%[=> ] 360.87M 34.2MB/s eta 69s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 13%[=> ] 366.21M 33.4MB/s eta 69s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 13%[=> ] 375.00M 34.3MB/s eta 69s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 14%[=> ] 384.13M 34.6MB/s eta 69s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 14%[=> ] 393.08M 35.7MB/s eta 67s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 14%[=> ] 400.97M 35.6MB/s eta 67s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 15%[==> ] 408.55M 35.8MB/s eta 67s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 15%[==> ] 414.54M 35.3MB/s eta 67s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 15%[==> ] 424.25M 35.8MB/s eta 67s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 16%[==> ] 431.66M 35.8MB/s eta 66s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 16%[==> ] 438.61M 35.9MB/s eta 66s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 16%[==> ] 447.61M 36.5MB/s eta 66s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 16%[==> ] 454.61M 36.0MB/s eta 66s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 17%[==> ] 462.03M 36.4MB/s eta 66s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 17%[==> ] 469.70M 36.4MB/s eta 64s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 17%[==> ] 476.51M 36.7MB/s eta 64s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 18%[==> ] 486.11M 37.0MB/s eta 64s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 18%[==> ] 493.39M 37.2MB/s eta 64s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 18%[==> ] 499.64M 37.3MB/s eta 64s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 18%[==> ] 506.92M 36.5MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 19%[==> ] 512.35M 35.9MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 19%[==> ] 519.88M 35.8MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 19%[==> ] 525.89M 35.6MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 19%[==> ] 531.25M 34.7MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 19%[==> ] 536.98M 34.0MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 20%[===> ] 544.22M 33.8MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 20%[===> ] 549.55M 32.9MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 20%[===> ] 555.91M 32.5MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 20%[===> ] 562.55M 32.1MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 21%[===> ] 567.99M 31.8MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 21%[===> ] 574.75M 31.5MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 21%[===> ] 581.53M 31.0MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 21%[===> ] 587.07M 30.2MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 22%[===> ] 593.86M 30.1MB/s eta 63s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 22%[===> ] 599.73M 29.9MB/s eta 62s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 22%[===> ] 606.64M 29.9MB/s eta 62s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 22%[===> ] 612.36M 29.4MB/s eta 62s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 22%[===> ] 618.79M 29.7MB/s eta 62s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 23%[===> ] 626.13M 30.1MB/s eta 62s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 23%[===> ] 631.06M 29.9MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 23%[===> ] 638.19M 30.2MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 23%[===> ] 644.61M 30.5MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 24%[===> ] 650.94M 30.6MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 24%[===> ] 658.44M 31.0MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 24%[===> ] 664.18M 31.1MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 24%[===> ] 670.22M 31.5MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 25%[====> ] 676.54M 31.0MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 25%[====> ] 684.53M 31.0MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 25%[====> ] 691.30M 31.5MB/s eta 61s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 25%[====> ] 698.07M 31.7MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 26%[====> ] 701.51M 30.5MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 26%[====> ] 707.91M 30.8MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 26%[====> ] 714.41M 30.5MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 26%[====> ] 719.98M 30.6MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 26%[====> ] 725.08M 29.6MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 27%[====> ] 730.89M 29.2MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 27%[====> ] 735.60M 28.8MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 27%[====> ] 740.57M 27.9MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 27%[====> ] 746.39M 28.0MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 27%[====> ] 751.94M 27.4MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 28%[====> ] 757.82M 27.2MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 28%[====> ] 762.14M 26.7MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 28%[====> ] 768.16M 26.7MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 28%[====> ] 772.89M 26.0MB/s eta 60s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 28%[====> ] 778.66M 25.9MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 29%[====> ] 784.75M 25.7MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 29%[====> ] 789.63M 25.2MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 29%[====> ] 795.25M 26.0MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 29%[====> ] 801.66M 25.7MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 29%[====> ] 806.66M 25.4MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 30%[=====> ] 812.05M 25.7MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 30%[=====> ] 818.36M 26.1MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 30%[=====> ] 823.80M 26.1MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 30%[=====> ] 828.85M 26.3MB/s eta 59s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 30%[=====> ] 833.92M 26.3MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 31%[=====> ] 839.25M 26.3MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 31%[=====> ] 846.21M 26.9MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 31%[=====> ] 851.33M 26.9MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 31%[=====> ] 856.52M 26.9MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 32%[=====> ] 862.83M 27.2MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 32%[=====> ] 868.80M 27.2MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 32%[=====> ] 874.02M 27.4MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 32%[=====> ] 879.14M 27.4MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 32%[=====> ] 884.40M 27.3MB/s eta 58s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 33%[=====> ] 891.00M 27.4MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 33%[=====> ] 896.74M 27.8MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 33%[=====> ] 901.97M 27.6MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 33%[=====> ] 907.16M 27.7MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 33%[=====> ] 912.58M 27.5MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 34%[=====> ] 917.94M 27.6MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 34%[=====> ] 924.85M 27.9MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 34%[=====> ] 930.05M 27.8MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 34%[=====> ] 935.25M 27.8MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 34%[=====> ] 940.94M 27.5MB/s eta 57s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 35%[======> ] 947.83M 28.1MB/s eta 56s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 35%[======> ] 953.02M 27.7MB/s eta 56s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 35%[======> ] 958.46M 27.7MB/s eta 56s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 35%[======> ] 964.78M 27.9MB/s eta 56s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 35%[======> ] 970.06M 28.2MB/s eta 56s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 36%[======> ] 975.86M 28.0MB/s eta 55s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 36%[======> ] 975.93M 26.1MB/s eta 55s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 36%[======> ] 988.02M 28.2MB/s eta 55s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 36%[======> ] 993.63M 28.2MB/s eta 55s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 37%[======> ] 998.91M 28.1MB/s eta 55s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 37%[======> ] 1004M 27.9MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 37%[======> ] 1010M 27.8MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 37%[======> ] 1014M 27.5MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 37%[======> ] 1020M 27.4MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 37%[======> ] 1024M 27.0MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 38%[======> ] 1.00G 26.4MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 38%[======> ] 1.01G 26.2MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 38%[======> ] 1.01G 25.7MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 38%[======> ] 1.02G 25.3MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 38%[======> ] 1.02G 24.7MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 38%[======> ] 1.03G 24.5MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 39%[======> ] 1.03G 24.8MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 39%[======> ] 1.03G 23.9MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 39%[======> ] 1.04G 23.6MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 39%[======> ] 1.04G 23.4MB/s eta 54s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 39%[======> ] 1.05G 23.1MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 40%[=======> ] 1.05G 23.0MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 40%[=======> ] 1.06G 22.7MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 40%[=======> ] 1.06G 22.6MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 40%[=======> ] 1.07G 21.7MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 40%[=======> ] 1.08G 23.4MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 41%[=======> ] 1.08G 23.5MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 41%[=======> ] 1.09G 23.9MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 41%[=======> ] 1.09G 24.3MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 41%[=======> ] 1.10G 24.0MB/s eta 53s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 41%[=======> ] 1.10G 24.6MB/s eta 52s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 42%[=======> ] 1.11G 24.6MB/s eta 52s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 42%[=======> ] 1.11G 25.2MB/s eta 52s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 42%[=======> ] 1.12G 25.4MB/s eta 52s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 42%[=======> ] 1.12G 25.6MB/s eta 52s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 42%[=======> ] 1.13G 25.4MB/s eta 51s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 43%[=======> ] 1.13G 25.7MB/s eta 51s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 43%[=======> ] 1.14G 26.2MB/s eta 51s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 43%[=======> ] 1.14G 26.1MB/s eta 51s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 43%[=======> ] 1.15G 27.7MB/s eta 51s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 43%[=======> ] 1.16G 26.7MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 44%[=======> ] 1.16G 26.6MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 44%[=======> ] 1.17G 27.1MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 44%[=======> ] 1.17G 26.9MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 44%[=======> ] 1.18G 26.9MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 44%[=======> ] 1.18G 27.6MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 45%[========> ] 1.19G 27.6MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 45%[========> ] 1.19G 27.4MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 45%[========> ] 1.20G 27.2MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 45%[========> ] 1.21G 27.8MB/s eta 50s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 46%[========> ] 1.21G 27.6MB/s eta 49s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 46%[========> ] 1.22G 27.6MB/s eta 49s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 46%[========> ] 1.22G 27.5MB/s eta 49s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 46%[========> ] 1.23G 27.7MB/s eta 49s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 46%[========> ] 1.23G 27.9MB/s eta 49s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 47%[========> ] 1.24G 27.5MB/s eta 48s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 47%[========> ] 1.24G 27.7MB/s eta 48s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 47%[========> ] 1.25G 27.9MB/s eta 48s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 47%[========> ] 1.26G 27.9MB/s eta 48s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 47%[========> ] 1.26G 28.0MB/s eta 48s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 48%[========> ] 1.27G 27.6MB/s eta 47s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 48%[========> ] 1.27G 27.9MB/s eta 47s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 48%[========> ] 1.28G 28.1MB/s eta 47s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 48%[========> ] 1.28G 28.2MB/s eta 47s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 48%[========> ] 1.29G 27.9MB/s eta 47s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 49%[========> ] 1.29G 27.6MB/s eta 46s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 49%[========> ] 1.30G 27.8MB/s eta 46s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 49%[========> ] 1.31G 28.1MB/s eta 46s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 49%[========> ] 1.31G 28.3MB/s eta 46s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 50%[=========> ] 1.32G 27.8MB/s eta 46s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 50%[=========> ] 1.32G 28.0MB/s eta 45s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 50%[=========> ] 1.33G 28.0MB/s eta 45s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 50%[=========> ] 1.33G 28.3MB/s eta 45s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 50%[=========> ] 1.34G 27.7MB/s eta 45s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 51%[=========> ] 1.34G 28.0MB/s eta 45s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 51%[=========> ] 1.35G 27.8MB/s eta 44s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 51%[=========> ] 1.35G 28.1MB/s eta 44s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 51%[=========> ] 1.36G 28.1MB/s eta 44s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 51%[=========> ] 1.37G 28.0MB/s eta 44s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 52%[=========> ] 1.37G 28.0MB/s eta 44s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 52%[=========> ] 1.38G 28.0MB/s eta 43s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 52%[=========> ] 1.38G 28.2MB/s eta 43s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 52%[=========> ] 1.39G 28.1MB/s eta 43s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 52%[=========> ] 1.39G 28.0MB/s eta 43s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 53%[=========> ] 1.40G 27.9MB/s eta 43s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 53%[=========> ] 1.41G 28.4MB/s eta 42s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 53%[=========> ] 1.41G 28.4MB/s eta 42s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 53%[=========> ] 1.42G 28.4MB/s eta 42s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 54%[=========> ] 1.42G 28.2MB/s eta 42s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 54%[=========> ] 1.43G 28.8MB/s eta 42s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 54%[=========> ] 1.43G 28.4MB/s eta 41s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 54%[=========> ] 1.44G 28.3MB/s eta 41s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 54%[=========> ] 1.45G 28.6MB/s eta 41s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 55%[==========> ] 1.45G 28.5MB/s eta 41s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 55%[==========> ] 1.46G 28.9MB/s eta 41s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 55%[==========> ] 1.46G 28.6MB/s eta 40s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 55%[==========> ] 1.47G 28.9MB/s eta 40s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 56%[==========> ] 1.47G 28.1MB/s eta 40s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 56%[==========> ] 1.48G 29.2MB/s eta 40s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 56%[==========> ] 1.49G 29.2MB/s eta 40s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 56%[==========> ] 1.50G 29.6MB/s eta 39s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 56%[==========> ] 1.50G 26.9MB/s eta 39s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 57%[==========> ] 1.50G 25.5MB/s eta 39s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 57%[==========> ] 1.52G 29.2MB/s eta 39s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 57%[==========> ] 1.53G 29.8MB/s eta 38s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 58%[==========> ] 1.53G 30.0MB/s eta 38s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 58%[==========> ] 1.54G 30.1MB/s eta 38s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 58%[==========> ] 1.55G 30.6MB/s eta 38s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 58%[==========> ] 1.55G 30.6MB/s eta 38s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 59%[==========> ] 1.56G 31.0MB/s eta 37s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 59%[==========> ] 1.56G 31.2MB/s eta 37s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 59%[==========> ] 1.57G 31.4MB/s eta 37s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 59%[==========> ] 1.58G 32.4MB/s eta 37s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 60%[===========> ] 1.58G 31.7MB/s eta 37s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 60%[===========> ] 1.59G 32.4MB/s eta 36s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 60%[===========> ] 1.60G 32.2MB/s eta 36s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 61%[===========> ] 1.61G 32.4MB/s eta 36s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 61%[===========> ] 1.61G 35.4MB/s eta 36s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 61%[===========> ] 1.62G 37.4MB/s eta 36s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 61%[===========> ] 1.63G 35.1MB/s eta 34s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 62%[===========> ] 1.64G 34.3MB/s eta 34s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 62%[===========> ] 1.64G 34.4MB/s eta 34s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 62%[===========> ] 1.65G 34.7MB/s eta 34s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 63%[===========> ] 1.66G 34.9MB/s eta 34s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 63%[===========> ] 1.67G 35.5MB/s eta 33s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 63%[===========> ] 1.68G 36.0MB/s eta 33s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 64%[===========> ] 1.68G 36.0MB/s eta 33s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 64%[===========> ] 1.69G 36.3MB/s eta 33s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 64%[===========> ] 1.70G 37.0MB/s eta 33s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 64%[===========> ] 1.71G 37.1MB/s eta 31s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 65%[============> ] 1.72G 37.4MB/s eta 31s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 65%[============> ] 1.73G 38.3MB/s eta 31s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 65%[============> ] 1.73G 38.2MB/s eta 31s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 66%[============> ] 1.74G 38.6MB/s eta 31s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 66%[============> ] 1.75G 38.7MB/s eta 30s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 66%[============> ] 1.76G 39.3MB/s eta 30s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 67%[============> ] 1.77G 39.1MB/s eta 30s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 67%[============> ] 1.77G 39.4MB/s eta 30s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 67%[============> ] 1.78G 39.2MB/s eta 30s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 67%[============> ] 1.79G 39.1MB/s eta 28s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 68%[============> ] 1.80G 39.6MB/s eta 28s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 68%[============> ] 1.81G 39.4MB/s eta 28s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 68%[============> ] 1.81G 39.6MB/s eta 28s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 69%[============> ] 1.82G 39.2MB/s eta 28s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 69%[============> ] 1.83G 39.9MB/s eta 27s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 69%[============> ] 1.84G 39.9MB/s eta 27s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 70%[=============> ] 1.85G 39.3MB/s eta 27s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 70%[=============> ] 1.86G 39.4MB/s eta 27s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 70%[=============> ] 1.86G 39.7MB/s eta 27s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 71%[=============> ] 1.87G 39.2MB/s eta 25s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 71%[=============> ] 1.88G 38.6MB/s eta 25s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 71%[=============> ] 1.88G 37.8MB/s eta 25s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 72%[=============> ] 1.90G 39.4MB/s eta 25s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 72%[=============> ] 1.91G 39.5MB/s eta 25s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 72%[=============> ] 1.91G 39.7MB/s eta 24s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 73%[=============> ] 1.92G 39.6MB/s eta 24s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 73%[=============> ] 1.93G 39.7MB/s eta 24s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 73%[=============> ] 1.94G 39.1MB/s eta 24s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 73%[=============> ] 1.94G 38.4MB/s eta 24s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 74%[=============> ] 1.95G 38.4MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 74%[=============> ] 1.96G 38.2MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 74%[=============> ] 1.97G 37.8MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 74%[=============> ] 1.97G 37.3MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 75%[==============> ] 1.98G 37.0MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 75%[==============> ] 1.99G 37.1MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 75%[==============> ] 2.00G 36.8MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 76%[==============> ] 2.00G 38.0MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 76%[==============> ] 2.01G 37.6MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 76%[==============> ] 2.02G 35.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 76%[==============> ] 2.02G 35.6MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 77%[==============> ] 2.03G 35.2MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 77%[==============> ] 2.04G 35.3MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 77%[==============> ] 2.04G 34.9MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 78%[==============> ] 2.05G 35.0MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 78%[==============> ] 2.06G 35.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 78%[==============> ] 2.07G 35.8MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 78%[==============> ] 2.08G 36.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 79%[==============> ] 2.08G 35.8MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 79%[==============> ] 2.09G 36.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 79%[==============> ] 2.10G 37.0MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 80%[===============> ] 2.11G 37.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 80%[===============> ] 2.12G 37.7MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 80%[===============> ] 2.12G 37.6MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 81%[===============> ] 2.13G 38.3MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 81%[===============> ] 2.14G 38.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 81%[===============> ] 2.15G 39.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 81%[===============> ] 2.16G 38.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 82%[===============> ] 2.17G 39.2MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 82%[===============> ] 2.17G 39.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 82%[===============> ] 2.18G 39.1MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 83%[===============> ] 2.19G 39.5MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 83%[===============> ] 2.20G 39.5MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 83%[===============> ] 2.21G 39.5MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 84%[===============> ] 2.21G 39.1MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 84%[===============> ] 2.22G 39.0MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 84%[===============> ] 2.23G 39.7MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 84%[===============> ] 2.24G 38.7MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L9 85%[================> ] 2.25G 39.7MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96 85%[================> ] 2.25G 39.5MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96- 85%[================> ] 2.26G 39.5MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D 86%[================> ] 2.27G 39.3MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1 86%[================> ] 2.28G 40.2MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D10 86%[================> ] 2.29G 39.6MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D102 87%[================> ] 2.29G 39.4MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024 87%[================> ] 2.30G 39.2MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024- 87%[================> ] 2.31G 39.2MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E 88%[================> ] 2.32G 40.2MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0 88%[================> ] 2.33G 39.8MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_ 88%[================> ] 2.34G 40.1MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1 89%[================> ] 2.34G 39.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L96-D1024-E0_1- 89%[================> ] 2.35G 39.8MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 89%[================> ] 2.36G 39.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L96-D1024-E0_1-en 90%[=================> ] 2.37G 39.4MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L96-D1024-E0_1-enw 90%[=================> ] 2.38G 40.0MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L96-D1024-E0_1-enwi 90%[=================> ] 2.39G 40.0MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "96-D1024-E0_1-enwik 91%[=================> ] 2.39G 40.6MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D1024-E0_1-enwiki 91%[=================> ] 2.40G 40.4MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D1024-E0_1-enwiki- 91%[=================> ] 2.41G 39.6MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D1024-E0_1-enwiki-4 91%[=================> ] 2.42G 40.8MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1024-E0_1-enwiki-4k 92%[=================> ] 2.43G 40.7MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "024-E0_1-enwiki-4k. 92%[=================> ] 2.44G 40.6MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "24-E0_1-enwiki-4k.p 92%[=================> ] 2.44G 40.2MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4-E0_1-enwiki-4k.pt 93%[=================> ] 2.45G 40.9MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 93%[=================> ] 2.46G 40.9MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_1-enwiki-4k.pth 93%[=================> ] 2.47G 40.6MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_1-enwiki-4k.pth 94%[=================> ] 2.48G 40.9MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_1-enwiki-4k.pth 94%[=================> ] 2.49G 41.2MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-enwiki-4k.pth 94%[=================> ] 2.49G 40.8MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-enwiki-4k.pth 95%[==================> ] 2.50G 40.5MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "enwiki-4k.pth 95%[==================> ] 2.51G 40.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "nwiki-4k.pth 95%[==================> ] 2.52G 40.0MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "wiki-4k.pth 96%[==================> ] 2.53G 41.1MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "iki-4k.pth 96%[==================> ] 2.54G 40.8MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "ki-4k.pth 96%[==================> ] 2.54G 40.7MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "i-4k.pth 96%[==================> ] 2.55G 40.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-4k.pth 97%[==================> ] 2.56G 40.7MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "4k.pth 97%[==================> ] 2.57G 40.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "k.pth 97%[==================> ] 2.58G 40.5MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 98%[==================> ] 2.59G 40.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 98%[==================> ] 2.60G 40.6MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 98%[==================> ] 2.60G 41.3MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 99%[==================> ] 2.61G 40.9MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 99%[==================> ] 2.62G 40.5MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 99%[==================> ] 2.63G 40.5MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L96-D1024-E0_1-e 100%[===================>] 2.63G 41.1MB/s in 82s \r\n", - "\r\n", - "2023-08-30 08:23:49 (32.8 MB/s) - ‘v5-L96-D1024-E0_1-enwiki-4k.pth’ saved [2825974771/2825974771]\r\n", - "\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "total 2.7G\r\n", - "drwxr-xr-x 2 root root 53 Aug 30 08:22 .\r\n", - "drwxr-xr-x 19 root root 4.0K Aug 30 08:22 ..\r\n", - "-rw-r--r-- 1 root root 2.7G Aug 30 08:18 v5-L96-D1024-E0_1-enwiki-4k.pth\r\n" - ] - } - ], - "source": [ - "# Download the model directly (stop gap till HF sync issues is resolved)\n", - "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", - " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/{FILENAME_PREFIX}-enwiki-4k.pth\"\n", - "\n", - "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", - " ls -alh ." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "0196f6c6", - "metadata": { - "papermill": { - "duration": 0.021608, - "end_time": "2023-08-30T08:23:49.858882", - "exception": false, - "start_time": "2023-08-30T08:23:49.837274", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Enwiki Stage 2 : Basic Instruct Tuning" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "866383a3", - "metadata": { - "execution": { - "iopub.execute_input": "2023-08-30T08:23:49.903977Z", - "iopub.status.busy": "2023-08-30T08:23:49.903724Z", - "iopub.status.idle": "2023-08-30T08:23:57.329655Z", - "shell.execute_reply": "2023-08-30T08:23:57.328808Z" - }, - "papermill": { - "duration": 7.450784, - "end_time": "2023-08-30T08:23:57.331109", - "exception": false, - "start_time": "2023-08-30T08:23:49.880325", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found cached dataset parquet (/actions-runner/.cache/huggingface/datasets/c-s-ale___parquet/c-s-ale--dolly-15k-instruction-alpaca-format-9dfbb23260d63d9d/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\r\n", - "\r", - " 0%| | 0/1 [00:00=12.1), as this is known to have freeze issues\r\n", - "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", - "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", - "#\r\n", - "\r\n", - "[RWKV.model] Configuring optimizer with\r\n", - " - lr_init: 4.000e-04 (0.0004)\r\n", - " - lr_final: 3.000e-04 (0.0003)\r\n", - "\r\n", - "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Detected CUDA files, patching ldflags\r\n", - "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/fused_adam/build.ninja...\r\n", - "Building extension module fused_adam...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n", - "ninja: no work to do.\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.06790018081665039 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10168147087097168 seconds\r\n", - "Time to load fused_adam op: 0.10191583633422852 seconds\r\n", - "Time to load fused_adam op: 0.10190057754516602 seconds\r\n", - "Time to load fused_adam op: 0.10140633583068848 seconds\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10164237022399902 seconds\r\n", - "Time to load fused_adam op: 0.10190320014953613 seconds\r\n", - "Loading `train_dataloader` to estimate number of stepping batches.\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10156393051147461 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/utils/build.ninja...\r\n", - "Building extension module utils...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.07002925872802734 seconds\r\n", - "Loading extension module utils...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10284900665283203 seconds\r\n", - "Time to load utils op: 0.10333824157714844 seconds\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10248160362243652 seconds\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10256314277648926 seconds\r\n", - "Time to load utils op: 0.10243940353393555 seconds\r\n", - "Time to load utils op: 0.10254120826721191 seconds\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10288691520690918 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 6 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 2 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n", - "Rank: 7 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 3 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 0 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 5 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 4 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 1 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0014386177062988281 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Time to load utils op: 0.0006914138793945312 seconds\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0006327629089355469 seconds\r\n", - "Time to load utils op: 0.0006318092346191406 seconds\r\n", - "Time to load utils op: 0.0006728172302246094 seconds\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0006492137908935547 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0009899139404296875 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0010592937469482422 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - " | Name | Type | Params\r\n", - "--------------------------------------\r\n", - "0 | emb | Embedding | 51.5 M\r\n", - "1 | blocks | ModuleList | 1.3 B \r\n", - "2 | ln_out | LayerNorm | 2.0 K \r\n", - "3 | head | Linear | 51.5 M\r\n", - "--------------------------------------\r\n", - "1.4 B Trainable params\r\n", - "0 Non-trainable params\r\n", - "1.4 B Total params\r\n", - "5,650.715 Total estimated model params size (MB)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Training: 0it [00:00, ?it/s]\r", - "Training: 0%| | 0/1867 [00:00=12.1), as this is known to have freeze issues\r\n", - "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", - "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", - "#\r\n", - "\r\n", - "[RWKV.model] Configuring optimizer with\r\n", - " - lr_init: 8.000e-04 (0.0008)\r\n", - " - lr_final: 5.000e-04 (0.0005)\r\n", - "\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Detected CUDA files, patching ldflags\r\n", - "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/fused_adam/build.ninja...\r\n", - "Building extension module fused_adam...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.07258319854736328 seconds\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.1014254093170166 seconds\r\n", - "Time to load fused_adam op: 0.1013331413269043 seconds\r\n", - "Time to load fused_adam op: 0.10129237174987793 seconds\r\n", - "Time to load fused_adam op: 0.1013648509979248 seconds\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time to load fused_adam op: 0.10182476043701172 seconds\r\n", - "Time to load fused_adam op: 0.10165524482727051 seconds\r\n", - "Time to load fused_adam op: 0.10207104682922363 seconds\r\n", - "Loading `train_dataloader` to estimate number of stepping batches.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/utils/build.ninja...\r\n", - "Building extension module utils...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0719151496887207 seconds\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10239577293395996 seconds\r\n", - "Time to load utils op: 0.1021265983581543 seconds\r\n", - "Time to load utils op: 0.10312938690185547 seconds\r\n", - "Time to load utils op: 0.10286855697631836 seconds\r\n", - "Time to load utils op: 0.10381817817687988 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10255122184753418 seconds\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10361599922180176 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 4 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 6 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 7 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 1 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 5 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 3 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 2 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 0 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0006549358367919922 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0008122920989990234 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Time to load utils op: 0.0006814002990722656 seconds\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Time to load utils op: 0.0007977485656738281 seconds\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0012519359588623047 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0010423660278320312 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.00095367431640625 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0009925365447998047 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - " | Name | Type | Params\r\n", - "--------------------------------------\r\n", - "0 | emb | Embedding | 51.5 M\r\n", - "1 | blocks | ModuleList | 1.3 B \r\n", - "2 | ln_out | LayerNorm | 2.0 K \r\n", - "3 | head | Linear | 51.5 M\r\n", - "--------------------------------------\r\n", - "1.4 B Trainable params\r\n", - "0 Non-trainable params\r\n", - "1.4 B Total params\r\n", - "5,650.715 Total estimated model params size (MB)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Training: 0it [00:00, ?it/s]\r", - "Training: 0%| | 0/4371 [00:00=12.1), as this is known to have freeze issues\r\n", - "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", - "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", - "#\r\n", - "\r\n", - "[RWKV.model] Configuring optimizer with\r\n", - " - lr_init: 5.000e-04 (0.0005)\r\n", - " - lr_final: 4.000e-04 (0.0004)\r\n", - "\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Detected CUDA files, patching ldflags\r\n", - "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/fused_adam/build.ninja...\r\n", - "Building extension module fused_adam...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.0751490592956543 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10148930549621582 seconds\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10161519050598145 seconds\r\n", - "Loading extension module fused_adam...\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10132598876953125 seconds\r\n", - "Time to load fused_adam op: 0.10149168968200684 seconds\r\n", - "Time to load fused_adam op: 0.1014404296875 seconds\r\n", - "Time to load fused_adam op: 0.10174369812011719 seconds\r\n", - "Loading `train_dataloader` to estimate number of stepping batches.\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.10146427154541016 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/utils/build.ninja...\r\n", - "Building extension module utils...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n", - "ninja: no work to do.\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.07143139839172363 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10282611846923828 seconds\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10311031341552734 seconds\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10220694541931152 seconds\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10280084609985352 seconds\r\n", - "Time to load utils op: 0.10232162475585938 seconds\r\n", - "Time to load utils op: 0.10236144065856934 seconds\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.10261821746826172 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 2 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 1 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 6 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 7 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 5 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 3 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 4 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank: 0 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0006723403930664062 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Time to load utils op: 0.0006999969482421875 seconds\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0006632804870605469 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "Time to load utils op: 0.0006909370422363281 seconds\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0007290840148925781 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0009744167327880859 seconds\r\n", - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0007312297821044922 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n", - "No modifications detected for re-loaded extension module utils, skipping build step...\r\n", - "Loading extension module utils...\r\n", - "Time to load utils op: 0.0010111331939697266 seconds\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - " | Name | Type | Params\r\n", - "--------------------------------------\r\n", - "0 | emb | Embedding | 51.5 M\r\n", - "1 | blocks | ModuleList | 1.3 B \r\n", - "2 | ln_out | LayerNorm | 2.0 K \r\n", - "3 | head | Linear | 51.5 M\r\n", - "--------------------------------------\r\n", - "1.4 B Trainable params\r\n", - "0 Non-trainable params\r\n", - "1.4 B Total params\r\n", - "5,650.715 Total estimated model params size (MB)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Training: 0it [00:00, ?it/s]\r", - "Training: 0%| | 0/16029 [00:00