diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" --- "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" @@ -1,3 +1,140296 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0db5673fbf59261a65cafb957510a87538f738b6cebffd10ed532db38dfdcb01 -size 53132732 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "fb8bab66", + "metadata": { + "papermill": { + "duration": 0.004984, + "end_time": "2023-09-06T18:15:07.813560", + "exception": false, + "start_time": "2023-09-06T18:15:07.808576", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "644a18dd", + "metadata": { + "papermill": { + "duration": 0.002407, + "end_time": "2023-09-06T18:15:07.820470", + "exception": false, + "start_time": "2023-09-06T18:15:07.818063", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "88954417", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:07.826313Z", + "iopub.status.busy": "2023-09-06T18:15:07.826023Z", + "iopub.status.idle": "2023-09-06T18:15:08.710191Z", + "shell.execute_reply": "2023-09-06T18:15:08.709357Z" + }, + "papermill": { + "duration": 0.889532, + "end_time": "2023-09-06T18:15:08.712219", + "exception": false, + "start_time": "2023-09-06T18:15:07.822687", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54728414", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:08.723120Z", + "iopub.status.busy": "2023-09-06T18:15:08.722681Z", + "iopub.status.idle": "2023-09-06T18:15:10.844392Z", + "shell.execute_reply": "2023-09-06T18:15:10.843613Z" + }, + "papermill": { + "duration": 2.129186, + "end_time": "2023-09-06T18:15:10.846223", + "exception": false, + "start_time": "2023-09-06T18:15:08.717037", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4e5c05c9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:10.856132Z", + "iopub.status.busy": "2023-09-06T18:15:10.855690Z", + "iopub.status.idle": "2023-09-06T18:15:10.864690Z", + "shell.execute_reply": "2023-09-06T18:15:10.864101Z" + }, + "papermill": { + "duration": 0.015147, + "end_time": "2023-09-06T18:15:10.866187", + "exception": false, + "start_time": "2023-09-06T18:15:10.851040", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT:" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L12-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=12\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9a735016", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:10.875858Z", + "iopub.status.busy": "2023-09-06T18:15:10.875465Z", + "iopub.status.idle": "2023-09-06T18:15:26.773342Z", + "shell.execute_reply": "2023-09-06T18:15:26.772573Z" + }, + "papermill": { + "duration": 15.904517, + "end_time": "2023-09-06T18:15:26.775109", + "exception": false, + "start_time": "2023-09-06T18:15:10.870592", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-06 18:15:10-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.110, 13.33.33.20, 13.33.33.55, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.110|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694283311&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MzMxMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=umgcHAKKVfM%7Elnvkc01zRnwcLHn3wrQfHfl-f-B8cnLRauI-kG63DbJ3BWLUFwsnUXKFrGoEYt8IW3AjE2J9QanT4tt1Zh34ojm5pdkTt4PvrIoX0iCwHRRIJGgV9h%7EF%7EMpsuweAJiHAbk61U4GSdt3fnpVaAKUKKa-VNDcmS3LwTuOx3gQgTqbTc-9ZMz14QcAVZV%7EgGZO5D1Owr0g0db9eatciOvhG7%7EnN%7ES%7EQIhVVZENXFPv0Ej8Jr11N0lmHdGU%7EBXm0fqUL1lCSaAEwCoIF%7EGrG2gtP049PGM9tapGgm6-4y4HbfsNVIxH-iRn-c2lvkjCpUSIOpMzce6wNvA__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 18:15:11-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694283311&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MzMxMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=umgcHAKKVfM%7Elnvkc01zRnwcLHn3wrQfHfl-f-B8cnLRauI-kG63DbJ3BWLUFwsnUXKFrGoEYt8IW3AjE2J9QanT4tt1Zh34ojm5pdkTt4PvrIoX0iCwHRRIJGgV9h%7EF%7EMpsuweAJiHAbk61U4GSdt3fnpVaAKUKKa-VNDcmS3LwTuOx3gQgTqbTc-9ZMz14QcAVZV%7EgGZO5D1Owr0g0db9eatciOvhG7%7EnN%7ES%7EQIhVVZENXFPv0Ej8Jr11N0lmHdGU%7EBXm0fqUL1lCSaAEwCoIF%7EGrG2gtP049PGM9tapGgm6-4y4HbfsNVIxH-iRn-c2lvkjCpUSIOpMzce6wNvA__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18.155.68.128, 18.155.68.73, 18.155.68.98, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.128|:443... connected.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HTTP request sent, awaiting response... 200 OK\r\n", + "Length: 1721187013 (1.6G) [binary/octet-stream]\r\n", + "Saving to: ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’\r\n", + "\r\n", + "\r", + " v5r3-L12- 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 1%[ ] 21.14M 106MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 2%[ ] 43.53M 109MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 4%[ ] 65.91M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 5%[> ] 88.31M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 6%[> ] 110.72M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 8%[> ] 133.11M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 9%[> ] 155.46M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 10%[=> ] 177.86M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 12%[=> ] 200.27M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 13%[=> ] 222.66M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 14%[=> ] 245.07M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 16%[==> ] 267.48M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 17%[==> ] 289.89M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 19%[==> ] 312.30M 112MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 20%[===> ] 334.71M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 21%[===> ] 357.11M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 23%[===> ] 379.51M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 24%[===> ] 401.92M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 25%[====> ] 424.32M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 27%[====> ] 446.72M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 28%[====> ] 469.12M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 29%[====> ] 491.52M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 31%[=====> ] 513.93M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 32%[=====> ] 536.33M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-enwiki-4k.pth 34%[=====> ] 558.73M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-enwiki-4k.pth 35%[======> ] 581.14M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-enwiki-4k.pth 36%[======> ] 602.65M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-enwiki-4k.pth 38%[======> ] 624.87M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-enwiki-4k.pth 39%[======> ] 647.26M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "enwiki-4k.pth 40%[=======> ] 669.68M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "nwiki-4k.pth 42%[=======> ] 692.08M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "wiki-4k.pth 43%[=======> ] 714.45M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "iki-4k.pth 44%[=======> ] 736.88M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 46%[========> ] 759.32M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 47%[========> ] 781.71M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-4k.pth 48%[========> ] 804.12M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4k.pth 50%[=========> ] 826.52M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 51%[=========> ] 848.92M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 53%[=========> ] 871.34M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 54%[=========> ] 893.74M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 55%[==========> ] 916.15M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 57%[==========> ] 938.55M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 58%[==========> ] 960.96M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 59%[==========> ] 983.35M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 61%[===========> ] 1006M 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 62%[===========> ] 1.00G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 64%[===========> ] 1.03G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 65%[============> ] 1.05G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 66%[============> ] 1.07G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L1 68%[============> ] 1.09G 112MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12 68%[============> ] 1.09G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12- 69%[============> ] 1.11G 104MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 70%[=============> ] 1.13G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 72%[=============> ] 1.16G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 73%[=============> ] 1.18G 104MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 74%[=============> ] 1.20G 105MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 76%[==============> ] 1.22G 105MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 77%[==============> ] 1.24G 104MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 79%[==============> ] 1.27G 105MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 80%[===============> ] 1.29G 105MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 81%[===============> ] 1.31G 104MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 83%[===============> ] 1.33G 105MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 84%[===============> ] 1.35G 105MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 85%[================> ] 1.38G 104MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 87%[================> ] 1.40G 103MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 88%[================> ] 1.42G 110MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 89%[================> ] 1.44G 111MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 91%[=================> ] 1.46G 111MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 92%[=================> ] 1.48G 110MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 93%[=================> ] 1.50G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 95%[==================> ] 1.52G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 96%[==================> ] 1.55G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 97%[==================> ] 1.57G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 99%[==================> ] 1.59G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 100%[===================>] 1.60G 110MB/s in 15s \r\n", + "\r\n", + "2023-09-06 18:15:26 (110 MB/s) - ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’ saved [1721187013/1721187013]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1.6G\r\n", + "drwxr-xr-x 2 root root 3 Sep 6 18:15 .\r\n", + "drwxr-xr-x 20 root root 24 Sep 6 18:15 ..\r\n", + "-rw-r--r-- 1 root root 1.7G Sep 6 15:04 v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-enwiki-4k.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2b6d6b9e", + "metadata": { + "papermill": { + "duration": 0.005279, + "end_time": "2023-09-06T18:15:26.791307", + "exception": false, + "start_time": "2023-09-06T18:15:26.786028", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Enwiki Stage 2 : Basic Instruct Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ec611ca0", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:26.803278Z", + "iopub.status.busy": "2023-09-06T18:15:26.803020Z", + "iopub.status.idle": "2023-09-06T18:15:34.152162Z", + "shell.execute_reply": "2023-09-06T18:15:34.151364Z" + }, + "papermill": { + "duration": 7.357428, + "end_time": "2023-09-06T18:15:34.154153", + "exception": false, + "start_time": "2023-09-06T18:15:26.796725", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Saving the dataset (0/1 shards): 0%| | 0/14932 [00:00=12.1), as this is known to have freeze issues\r\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", + "#\r\n", + "\r\n", + "[RWKV.model] Configuring optimizer with\r\n", + " - lr_init: 4.000e-04 (0.0004)\r\n", + " - lr_final: 3.000e-04 (0.0003)\r\n", + "\r\n", + "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n", + "Detected CUDA files, patching ldflags\r\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", + "Building extension module fused_adam...\r\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ninja: no work to do.\r\n", + "Loading extension module fused_adam...\r\n", + "Time to load fused_adam op: 0.059059858322143555 seconds\r\n", + "Loading `train_dataloader` to estimate number of stepping batches.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rank: 0 partition count [1, 1] and sizes[(860549120, False), (768, False)] \r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + " | Name | Type | Params\r\n", + "--------------------------------------\r\n", + "0 | emb | Embedding | 102 M \r\n", + "1 | blocks | ModuleList | 654 M \r\n", + "2 | ln_out | LayerNorm | 4.1 K \r\n", + "3 | head | Linear | 102 M \r\n", + "--------------------------------------\r\n", + "860 M Trainable params\r\n", + "0 Non-trainable params\r\n", + "860 M Total params\r\n", + "3,442.200 Total estimated model params size (MB)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Training: 0it [00:00, ?it/s]\r", + "Training: 0%| | 0/14932 [00:00\r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 542, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2112, in load_dataset\r\n", + " builder_instance = load_dataset_builder(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1798, in load_dataset_builder\r\n", + " dataset_module = dataset_module_factory(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1413, in dataset_module_factory\r\n", + " ).get_module()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 948, in get_module\r\n", + " patterns = sanitize_patterns(self.data_files) if self.data_files is not None else get_data_patterns(base_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/data_files.py\", line 459, in get_data_patterns\r\n", + " raise EmptyDatasetError(f\"The directory at {base_path} doesn't contain any data files\") from None\r\n", + "datasets.data_files.EmptyDatasetError: The directory at /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset doesn't contain any data files\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/t4ttsqx6\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v31\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_183907-t4ttsqx6/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-instruct.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Instruct (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-instruct/\" \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-instruct.pth\" \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d08eb257", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:26.893633Z", + "iopub.status.busy": "2023-09-06T18:39:26.893337Z", + "iopub.status.idle": "2023-09-06T18:39:29.382332Z", + "shell.execute_reply": "2023-09-06T18:39:29.381567Z" + }, + "papermill": { + "duration": 3.132835, + "end_time": "2023-09-06T18:39:29.384061", + "exception": false, + "start_time": "2023-09-06T18:39:26.251226", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 18:39:28,513] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-instruct/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-instruct/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-instruct.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "39060592", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:30.624854Z", + "iopub.status.busy": "2023-09-06T18:39:30.624554Z", + "iopub.status.idle": "2023-09-06T18:39:30.862069Z", + "shell.execute_reply": "2023-09-06T18:39:30.861310Z" + }, + "papermill": { + "duration": 0.873983, + "end_time": "2023-09-06T18:39:30.863701", + "exception": false, + "start_time": "2023-09-06T18:39:29.989718", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "markdown", + "id": "9ed8fd21", + "metadata": { + "papermill": { + "duration": 0.601874, + "end_time": "2023-09-06T18:39:32.106539", + "exception": false, + "start_time": "2023-09-06T18:39:31.504665", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 2 : Low ctx size (512), memory training\n", + "\n", + "- Tune 2: Low ctx size (512), Training with instruction & input masked. This forces the actual memory training on the output tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "04256ca3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:33.385937Z", + "iopub.status.busy": "2023-09-06T18:39:33.385635Z", + "iopub.status.idle": "2023-09-06T18:39:33.431947Z", + "shell.execute_reply": "2023-09-06T18:39:33.431311Z" + }, + "papermill": { + "duration": 0.688742, + "end_time": "2023-09-06T18:39:33.433721", + "exception": false, + "start_time": "2023-09-06T18:39:32.744979", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "bash: line 7: cd: {CONFIG_DIR}: No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 10K\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 2 root root 2 Sep 6 18:39 .\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 6 root root 11 Sep 6 18:39 ..\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Go to config dir\n", + "cd \"{CONFIG_DIR}\"\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We switch over to fully masked instruct+input, to properly learn the memorization task\n", + "#\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 5000 &\n", + "for i in {5..95..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 5000 & \n", + "done\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-100-count.jsonl 100 5000 &\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-200-count.jsonl 200 5000 &\n", + "\n", + "#\n", + "# We mixin the shuffled word list, so that we ensure all words / tokens are learned\n", + "# however this might intrduce an exclusion bias (if seen this word, never repeat it), \n", + "# so we limit the mixture of this data samples\n", + "#\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-10-count.jsonl 10 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-15-count.jsonl 15 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-25-count.jsonl 25 30 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-50-count.jsonl 50 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-75-count.jsonl 75 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-100-count.jsonl 100 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-200-count.jsonl 200 50 &\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1fc0cef1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:34.676127Z", + "iopub.status.busy": "2023-09-06T18:39:34.675939Z", + "iopub.status.idle": "2023-09-06T18:39:53.301947Z", + "shell.execute_reply": "2023-09-06T18:39:53.301048Z" + }, + "papermill": { + "duration": 19.230409, + "end_time": "2023-09-06T18:39:53.303981", + "exception": false, + "start_time": "2023-09-06T18:39:34.073572", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 18:39:37,542] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 2825309834\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 2825309834\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230906_183940-f9fnknh5\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f9fnknh5\u001b[0m\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", + " self.instantiate_classes()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", + " self.config_init = self.parser.instantiate_classes(self.config)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", + " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", + " component.instantiate_class(component, cfg)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", + " parent[key] = group.group_class(**value)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 559, in __init__\r\n", + " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", + "ValueError: load_model file '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth' does not exist\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f9fnknh5\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v32\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_183940-f9fnknh5/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-512 (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/\" \\\n", + " --model.lr_init=5e-4 \\\n", + " --model.lr_final=4e-4 \\\n", + " --data.max_token_size=512 \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f9b70eea", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:54.579000Z", + "iopub.status.busy": "2023-09-06T18:39:54.578746Z", + "iopub.status.idle": "2023-09-06T18:39:57.009190Z", + "shell.execute_reply": "2023-09-06T18:39:57.008417Z" + }, + "papermill": { + "duration": 3.068353, + "end_time": "2023-09-06T18:39:57.010860", + "exception": false, + "start_time": "2023-09-06T18:39:53.942507", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 18:39:56,146] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-ctx-512.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "01fca15c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:58.256313Z", + "iopub.status.busy": "2023-09-06T18:39:58.256022Z", + "iopub.status.idle": "2023-09-06T18:39:58.488872Z", + "shell.execute_reply": "2023-09-06T18:39:58.488054Z" + }, + "papermill": { + "duration": 0.874819, + "end_time": "2023-09-06T18:39:58.490725", + "exception": false, + "start_time": "2023-09-06T18:39:57.615906", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 1492.297607, + "end_time": "2023-09-06T18:39:59.213287", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "parameters": {}, + "start_time": "2023-09-06T18:15:06.915680", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file