diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" --- "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" @@ -1,3 +1,140490 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0db5673fbf59261a65cafb957510a87538f738b6cebffd10ed532db38dfdcb01 -size 53132732 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "3174f701", + "metadata": { + "papermill": { + "duration": 0.005065, + "end_time": "2023-09-06T17:12:47.606560", + "exception": false, + "start_time": "2023-09-06T17:12:47.601495", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "cb5debdd", + "metadata": { + "papermill": { + "duration": 0.002115, + "end_time": "2023-09-06T17:12:47.612606", + "exception": false, + "start_time": "2023-09-06T17:12:47.610491", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "41bbf98d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:47.618377Z", + "iopub.status.busy": "2023-09-06T17:12:47.618157Z", + "iopub.status.idle": "2023-09-06T17:12:48.494513Z", + "shell.execute_reply": "2023-09-06T17:12:48.493600Z" + }, + "papermill": { + "duration": 0.881639, + "end_time": "2023-09-06T17:12:48.496472", + "exception": false, + "start_time": "2023-09-06T17:12:47.614833", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bc308e46", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:48.506904Z", + "iopub.status.busy": "2023-09-06T17:12:48.506660Z", + "iopub.status.idle": "2023-09-06T17:12:50.610312Z", + "shell.execute_reply": "2023-09-06T17:12:50.609442Z" + }, + "papermill": { + "duration": 2.110898, + "end_time": "2023-09-06T17:12:50.612132", + "exception": false, + "start_time": "2023-09-06T17:12:48.501234", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5ecce62b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:50.622765Z", + "iopub.status.busy": "2023-09-06T17:12:50.622510Z", + "iopub.status.idle": "2023-09-06T17:12:50.631551Z", + "shell.execute_reply": "2023-09-06T17:12:50.630955Z" + }, + "papermill": { + "duration": 0.01615, + "end_time": "2023-09-06T17:12:50.633066", + "exception": false, + "start_time": "2023-09-06T17:12:50.616916", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L12-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=12\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ecee273d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:12:50.643057Z", + "iopub.status.busy": "2023-09-06T17:12:50.642933Z", + "iopub.status.idle": "2023-09-06T17:13:06.898900Z", + "shell.execute_reply": "2023-09-06T17:13:06.898162Z" + }, + "papermill": { + "duration": 16.262552, + "end_time": "2023-09-06T17:13:06.900660", + "exception": false, + "start_time": "2023-09-06T17:12:50.638108", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-06 17:12:50-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.55, 13.33.33.102, 13.33.33.110, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279570&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTU3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=vlJUg9zPT-Ie2MebXI3t7Sfovkvk30xvNya0WqXvAogwISiGWpmGNd3IKa0rDNdEdrQ3uREbJSFhcam12E5VepvwzlhCsUFsI4W9YnOQ8JOVAtNH5fzk16zGizK7%7EtmvJszRMbwukNZOp6TGz4kqEQPgwAwv26tPs9mP2ATP59hiH30jVnK1yjYot7Y2UAC6vKBdF3%7E%7EZUsL-ZfcYL0lTLE7xPmtgafMs3DM-TJhA1wPXw2r-ByBDo2l6edDKcosW36ncjch5kT5XXrnmxEhX4Yll0kAYuwvfXZI2AsIfeopfeKyYhg0KKeAwrPaxHzAcfQSHQn%7EVIjtW-Ro-8XAUw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 17:12:51-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279570&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTU3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=vlJUg9zPT-Ie2MebXI3t7Sfovkvk30xvNya0WqXvAogwISiGWpmGNd3IKa0rDNdEdrQ3uREbJSFhcam12E5VepvwzlhCsUFsI4W9YnOQ8JOVAtNH5fzk16zGizK7%7EtmvJszRMbwukNZOp6TGz4kqEQPgwAwv26tPs9mP2ATP59hiH30jVnK1yjYot7Y2UAC6vKBdF3%7E%7EZUsL-ZfcYL0lTLE7xPmtgafMs3DM-TJhA1wPXw2r-ByBDo2l6edDKcosW36ncjch5kT5XXrnmxEhX4Yll0kAYuwvfXZI2AsIfeopfeKyYhg0KKeAwrPaxHzAcfQSHQn%7EVIjtW-Ro-8XAUw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18.155.68.98, 18.155.68.128, 18.155.68.94, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.98|:443... connected.\r\n", + "HTTP request sent, awaiting response... 200 OK\r\n", + "Length: 1721187013 (1.6G) [binary/octet-stream]\r\n", + "Saving to: ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’\r\n", + "\r\n", + "\r", + " v5r3-L12- 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 1%[ ] 21.42M 107MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 2%[ ] 43.83M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 4%[ ] 66.17M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 5%[> ] 88.57M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 6%[> ] 110.92M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 8%[> ] 133.36M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 9%[> ] 155.76M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 10%[=> ] 178.16M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 12%[=> ] 200.56M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 13%[=> ] 222.97M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 14%[=> ] 244.89M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 16%[==> ] 267.32M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 17%[==> ] 289.71M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 19%[==> ] 312.11M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 20%[===> ] 334.51M 111MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 21%[===> ] 356.91M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 23%[===> ] 379.31M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 24%[===> ] 401.72M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 25%[====> ] 424.13M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 27%[====> ] 446.53M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 28%[====> ] 468.94M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 29%[====> ] 491.34M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 31%[=====> ] 513.75M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 32%[=====> ] 536.15M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-enwiki-4k.pth 34%[=====> ] 558.51M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-enwiki-4k.pth 35%[======> ] 580.91M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-enwiki-4k.pth 36%[======> ] 603.30M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-enwiki-4k.pth 38%[======> ] 625.71M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-enwiki-4k.pth 39%[======> ] 648.10M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "enwiki-4k.pth 40%[=======> ] 670.50M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "nwiki-4k.pth 42%[=======> ] 692.78M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "wiki-4k.pth 43%[=======> ] 715.16M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "iki-4k.pth 44%[=======> ] 737.57M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 46%[========> ] 759.99M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 47%[========> ] 782.39M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-4k.pth 49%[========> ] 804.80M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4k.pth 50%[=========> ] 827.18M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 51%[=========> ] 849.60M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 53%[=========> ] 872.00M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 54%[=========> ] 894.41M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 55%[==========> ] 916.82M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 57%[==========> ] 939.22M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 58%[==========> ] 961.63M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 59%[==========> ] 984.03M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 61%[===========> ] 1006M 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 62%[===========> ] 1.00G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 64%[===========> ] 1.03G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 65%[============> ] 1.05G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 66%[============> ] 1.07G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L1 68%[============> ] 1.09G 112MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12 68%[============> ] 1.09G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12- 69%[============> ] 1.11G 103MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 70%[=============> ] 1.13G 101MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 71%[=============> ] 1.14G 98.7MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 72%[=============> ] 1.16G 96.7MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 73%[=============> ] 1.17G 93.8MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 74%[=============> ] 1.19G 93.4MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 75%[==============> ] 1.21G 91.2MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 76%[==============> ] 1.22G 89.5MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 77%[==============> ] 1.24G 89.3MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 78%[==============> ] 1.26G 85.2MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 79%[==============> ] 1.28G 85.1MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 81%[===============> ] 1.30G 85.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 82%[===============> ] 1.32G 84.6MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 83%[===============> ] 1.34G 85.0MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 85%[================> ] 1.37G 86.9MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 86%[================> ] 1.39G 92.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 87%[================> ] 1.41G 94.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 89%[================> ] 1.43G 96.2MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 90%[=================> ] 1.45G 99.5MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 92%[=================> ] 1.47G 101MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 93%[=================> ] 1.50G 104MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 94%[=================> ] 1.52G 105MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 96%[==================> ] 1.54G 107MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 97%[==================> ] 1.56G 107MB/s eta 0s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 98%[==================> ] 1.58G 111MB/s eta 0s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 100%[===================>] 1.60G 112MB/s in 15s \r\n", + "\r\n", + "2023-09-06 17:13:06 (106 MB/s) - ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’ saved [1721187013/1721187013]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1.5G\r\n", + "drwxr-xr-x 2 root root 3 Sep 6 17:12 .\r\n", + "drwxr-xr-x 20 root root 24 Sep 6 17:12 ..\r\n", + "-rw-r--r-- 1 root root 1.7G Sep 6 15:04 v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-enwiki-4k.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "326602ab", + "metadata": { + "papermill": { + "duration": 0.005225, + "end_time": "2023-09-06T17:13:06.914108", + "exception": false, + "start_time": "2023-09-06T17:13:06.908883", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Enwiki Stage 2 : Basic Instruct Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e3aa35e9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:13:06.925901Z", + "iopub.status.busy": "2023-09-06T17:13:06.925655Z", + "iopub.status.idle": "2023-09-06T17:13:14.291842Z", + "shell.execute_reply": "2023-09-06T17:13:14.291053Z" + }, + "papermill": { + "duration": 7.374402, + "end_time": "2023-09-06T17:13:14.293884", + "exception": false, + "start_time": "2023-09-06T17:13:06.919482", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Saving the dataset (0/1 shards): 0%| | 0/14932 [00:00=12.1), as this is known to have freeze issues\r\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", + "#\r\n", + "\r\n", + "[RWKV.model] Configuring optimizer with\r\n", + " - lr_init: 4.000e-04 (0.0004)\r\n", + " - lr_final: 3.000e-04 (0.0003)\r\n", + "\r\n", + "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected CUDA files, patching ldflags\r\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", + "Building extension module fused_adam...\r\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n", + "ninja: no work to do.\r\n", + "Loading extension module fused_adam...\r\n", + "Time to load fused_adam op: 0.06091904640197754 seconds\r\n", + "Loading `train_dataloader` to estimate number of stepping batches.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rank: 0 partition count [1, 1] and sizes[(860549120, False), (768, False)] \r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + " | Name | Type | Params\r\n", + "--------------------------------------\r\n", + "0 | emb | Embedding | 102 M \r\n", + "1 | blocks | ModuleList | 654 M \r\n", + "2 | ln_out | LayerNorm | 4.1 K \r\n", + "3 | head | Linear | 102 M \r\n", + "--------------------------------------\r\n", + "860 M Trainable params\r\n", + "0 Non-trainable params\r\n", + "860 M Total params\r\n", + "3,442.200 Total estimated model params size (MB)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Training: 0it [00:00, ?it/s]\r", + "Training: 0%| | 0/14932 [00:00\r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 542, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2112, in load_dataset\r\n", + " builder_instance = load_dataset_builder(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1798, in load_dataset_builder\r\n", + " dataset_module = dataset_module_factory(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1413, in dataset_module_factory\r\n", + " ).get_module()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 948, in get_module\r\n", + " patterns = sanitize_patterns(self.data_files) if self.data_files is not None else get_data_patterns(base_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/data_files.py\", line 459, in get_data_patterns\r\n", + " raise EmptyDatasetError(f\"The directory at {base_path} doesn't contain any data files\") from None\r\n", + "datasets.data_files.EmptyDatasetError: The directory at /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset doesn't contain any data files\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/jm2b2y5r\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v24\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_173654-jm2b2y5r/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-instruct.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Instruct (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-instruct/\" \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-instruct.pth\" \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1aa1f08c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:15.869868Z", + "iopub.status.busy": "2023-09-06T17:37:15.869574Z", + "iopub.status.idle": "2023-09-06T17:37:18.336322Z", + "shell.execute_reply": "2023-09-06T17:37:18.335570Z" + }, + "papermill": { + "duration": 3.092432, + "end_time": "2023-09-06T17:37:18.338136", + "exception": false, + "start_time": "2023-09-06T17:37:15.245704", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:37:17,473] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-instruct/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-instruct/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-instruct.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "07518561", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:19.555489Z", + "iopub.status.busy": "2023-09-06T17:37:19.555305Z", + "iopub.status.idle": "2023-09-06T17:37:19.787874Z", + "shell.execute_reply": "2023-09-06T17:37:19.787184Z" + }, + "papermill": { + "duration": 0.823967, + "end_time": "2023-09-06T17:37:19.789695", + "exception": false, + "start_time": "2023-09-06T17:37:18.965728", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "markdown", + "id": "42fec908", + "metadata": { + "papermill": { + "duration": 0.633567, + "end_time": "2023-09-06T17:37:21.058555", + "exception": false, + "start_time": "2023-09-06T17:37:20.424988", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 2 : Low ctx size (512), memory training\n", + "\n", + "- Tune 2: Low ctx size (512), Training with instruction & input masked. This forces the actual memory training on the output tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "577fea20", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:22.293582Z", + "iopub.status.busy": "2023-09-06T17:37:22.293253Z", + "iopub.status.idle": "2023-09-06T17:37:22.339491Z", + "shell.execute_reply": "2023-09-06T17:37:22.339111Z" + }, + "papermill": { + "duration": 0.679609, + "end_time": "2023-09-06T17:37:22.341670", + "exception": false, + "start_time": "2023-09-06T17:37:21.662061", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 10K\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 2 root root 2 Sep 6 17:36 .\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 6 root root 11 Sep 6 17:36 ..\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We switch over to fully masked instruct+input, to properly learn the memorization task\n", + "#\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 5000 &\n", + "for i in {5..95..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 5000 & \n", + "done\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-100-count.jsonl 100 5000 &\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-200-count.jsonl 200 5000 &\n", + "\n", + "#\n", + "# We mixin the shuffled word list, so that we ensure all words / tokens are learned\n", + "# however this might intrduce an exclusion bias (if seen this word, never repeat it), \n", + "# so we limit the mixture of this data samples\n", + "#\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-10-count.jsonl 10 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-15-count.jsonl 15 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-25-count.jsonl 25 30 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-50-count.jsonl 50 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-75-count.jsonl 75 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-100-count.jsonl 100 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-200-count.jsonl 200 50 &\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5928163b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:23.574211Z", + "iopub.status.busy": "2023-09-06T17:37:23.573930Z", + "iopub.status.idle": "2023-09-06T17:37:41.229665Z", + "shell.execute_reply": "2023-09-06T17:37:41.228859Z" + }, + "papermill": { + "duration": 18.288664, + "end_time": "2023-09-06T17:37:41.231690", + "exception": false, + "start_time": "2023-09-06T17:37:22.943026", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:37:26,449] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4258540337\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 4258540337\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230906_173728-uklb27ld\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/uklb27ld\u001b[0m\r\n", + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", + " self.instantiate_classes()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", + " self.config_init = self.parser.instantiate_classes(self.config)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", + " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", + " component.instantiate_class(component, cfg)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", + " parent[key] = group.group_class(**value)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 559, in __init__\r\n", + " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", + "ValueError: load_model file '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth' does not exist\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/uklb27ld\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v25\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_173728-uklb27ld/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-512 (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/\" \\\n", + " --model.lr_init=5e-4 \\\n", + " --model.lr_final=4e-4 \\\n", + " --data.max_token_size=512 \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3c4e1a84", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:42.501242Z", + "iopub.status.busy": "2023-09-06T17:37:42.500756Z", + "iopub.status.idle": "2023-09-06T17:37:44.926179Z", + "shell.execute_reply": "2023-09-06T17:37:44.925416Z" + }, + "papermill": { + "duration": 3.062059, + "end_time": "2023-09-06T17:37:44.927885", + "exception": false, + "start_time": "2023-09-06T17:37:41.865826", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:37:44,062] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-ctx-512.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ff1e2d52", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:37:46.169833Z", + "iopub.status.busy": "2023-09-06T17:37:46.169341Z", + "iopub.status.idle": "2023-09-06T17:37:46.403204Z", + "shell.execute_reply": "2023-09-06T17:37:46.402355Z" + }, + "papermill": { + "duration": 0.870871, + "end_time": "2023-09-06T17:37:46.404923", + "exception": false, + "start_time": "2023-09-06T17:37:45.534052", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 1500.445098, + "end_time": "2023-09-06T17:37:47.161793", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "parameters": {}, + "start_time": "2023-09-06T17:12:46.716695", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file