diff --git a/.gitattributes b/.gitattributes index b6d19782bcbc7fe236d0826bfe059c5aa9f6ba2a..6bfafb4b9e0a2d2de0cd20d789c29ea71d2ec8b3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -87,21 +87,3 @@ experiment/memory-bench/logs/v5-L6-D1024-E0_1-16k.csv filter=lfs diff=lfs merge= experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb filter=lfs diff=lfs merge=lfs -text diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth deleted file mode 100644 index df9e649b821c97a2e5912e7f73555d6ed6cb4133..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c447bfd1844d0c3e536fb8824d029fd8b0e334e1368f807a4e85cd7099005130 -size 1721187285 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index 7e510b454e51f3e1360458d4aa4bc8f97480caba..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70b243f59685c4df841f16343bc7ff6947a3125cec5dabf9035b28b65c04da0e -size 1721187285 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth deleted file mode 100644 index af865daa0e29c34250db338e077056e58b7bba5e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a57c278ed7e7e2f9d7f0436540674bfa5178adcd04c3154f5d92992e0602c55b -size 1721187621 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth deleted file mode 100644 index 613ae00e6b94d2f54a518f1072636f21e570c85e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06105d96413046fce0ec189b9c4685a813cfa7147300851c5d2afc7b5adbcb38 -size 1721189797 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb deleted file mode 100644 index 04f75fe75d5a993aa7050629019cd0e3cf72c508..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31f61ce42e82d9a475446458ed015a190f16dd9b2b17bd67f4feedd9f72750ad -size 16577145 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb deleted file mode 100644 index 2caac2060cc81dbfc7e4840004960eeed06d0e29..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b53c27ed2c20b9f1f690647a83c0fbe2ce09594518b9ec557f515a4f8b548f2b -size 15941299 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index 2f736f75a1664aecb04c1e0fe217b71a77aecbf5..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c380bcd4b861a8af263fd56dc6e183b9e06ba0bc8f9895c4dcd8a678b58296e8 -size 1721187621 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth deleted file mode 100644 index 7f96f25f0aac70ea5a9c88a5208d0c071bd9fee9..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89f8caf661887bdba1897a10009f033331c552bfb763112e6da1b850d8ec3ff7 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth deleted file mode 100644 index 15f17b01a2826359ce6ac3f3bea9b310b2b596e3..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2680e091197e798686c97bdd2af0f6827f2b29c648cc1ae03f67d6f094859618 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth deleted file mode 100644 index 8bb49e3d9132afcd95cecfa46932131d2971c1e2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35a5d7571d90160edc20ce95abfdbcb6109ad47eccdefe8051bd8f15d12bf326 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth deleted file mode 100644 index ac0f774b491b1af1dec3e65871dcb2618a295104..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6b50bf05f191da87a6a17072d485d4059a4ded1335605e6b7bb8e9f2648d966 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb deleted file mode 100644 index ae3b85f4a8acf6c3001f445ade22f015a8d52327..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb +++ /dev/null @@ -1,2461 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "d3126ef2", - "metadata": { - "papermill": { - "duration": 0.004879, - "end_time": "2023-10-11T08:02:23.608034", - "exception": false, - "start_time": "2023-10-11T08:02:23.603155", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# RWKV v5 multi-size training experiment\n", - "\n", - "**Note:** This project assumes you have the rwkv-infctx conda env setup" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "986070aa", - "metadata": { - "papermill": { - "duration": 0.002523, - "end_time": "2023-10-11T08:02:23.613605", - "exception": false, - "start_time": "2023-10-11T08:02:23.611082", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Basic Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "dc924c7f", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:23.620990Z", - "iopub.status.busy": "2023-10-11T08:02:23.620432Z", - "iopub.status.idle": "2023-10-11T08:02:24.379549Z", - "shell.execute_reply": "2023-10-11T08:02:24.378580Z" - }, - "papermill": { - "duration": 0.765369, - "end_time": "2023-10-11T08:02:24.381741", - "exception": false, - "start_time": "2023-10-11T08:02:23.616372", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# First lets setup the various directories, and init the model\n", - "!mkdir -p ../../../../model/\n", - "!mkdir -p ../../../../datapath/\n", - "!mkdir -p ../../../../checkpoint/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2bbc32ac", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:24.389788Z", - "iopub.status.busy": "2023-10-11T08:02:24.389227Z", - "iopub.status.idle": "2023-10-11T08:02:24.398441Z", - "shell.execute_reply": "2023-10-11T08:02:24.397578Z" - }, - "papermill": { - "duration": 0.015548, - "end_time": "2023-10-11T08:02:24.400362", - "exception": false, - "start_time": "2023-10-11T08:02:24.384814", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DEEPSPEED_STRAT: deepspeed_stage_2_offload\n", - "ENABLE_WANDB: True\n", - "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train\n", - "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" - ] - } - ], - "source": [ - "DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n", - "GPU_DEVICES=\"auto\"\n", - "ENABLE_WANDB=True\n", - "\n", - "EMBED_SCALE=0.01\n", - "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", - "\n", - "EMBED_SIZE=2048\n", - "\n", - "WANDB_PREFIX=f\"[Multi-size] v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE}\"\n", - "FILENAME_PREFIX=f\"v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE_LABEL}\"\n", - "\n", - "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", - "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", - "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", - "\n", - "if ENABLE_WANDB:\n", - " WANDB_MODE=\"online\"\n", - "else:\n", - " WANDB_MODE=\"disabled\"\n", - "\n", - "# Computing the notebook, and various paths\n", - "import os\n", - "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", - "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", - "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "\n", - "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", - "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", - "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", - "print(\"PROJECT_DIR:\", PROJECT_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ffa69634", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:24.408311Z", - "iopub.status.busy": "2023-10-11T08:02:24.407798Z", - "iopub.status.idle": "2023-10-11T08:03:19.634663Z", - "shell.execute_reply": "2023-10-11T08:03:19.633765Z" - }, - "papermill": { - "duration": 55.233419, - "end_time": "2023-10-11T08:03:19.636895", - "exception": false, - "start_time": "2023-10-11T08:02:24.403476", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-10-11 08:02:24-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.7, 18.154.227.69, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-10-11 08:02:24-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.69, 3.162.112.2, 3.162.112.100, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.69|:443... connected.\r\n", - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 1066536657 (1017M) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L6-D2048-E0_01-split-2a.pth’\r\n", - "\r\n", - "\r", - " v5-L6-D20 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 1%[ ] 15.26M 42.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 3%[ ] 30.52M 47.7MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 4%[ ] 45.26M 51.5MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 5%[> ] 59.20M 52.1MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 6%[> ] 65.20M 48.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 7%[> ] 76.29M 44.4MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 8%[> ] 91.03M 47.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 9%[> ] 91.55M 43.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 10%[=> ] 106.81M 43.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 11%[=> ] 120.25M 43.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 41.4MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 13%[=> ] 136.81M 42.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.8MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 15%[==> ] 152.72M 40.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 16%[==> ] 167.85M 41.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 18%[==> ] 183.10M 43.0MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2a 19%[==> ] 198.36M 43.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2a. 20%[===> ] 213.11M 44.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2a.p 22%[===> ] 228.36M 43.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2a.pt 22%[===> ] 228.87M 41.1MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2a.pth 24%[===> ] 244.13M 41.0MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2a.pth 25%[====> ] 259.40M 42.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2a.pth 26%[====> ] 272.83M 40.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2a.pth 28%[====> ] 289.40M 41.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2a.pth 28%[====> ] 289.92M 37.9MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2a.pth 29%[====> ] 304.66M 36.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2a.pth 30%[=====> ] 305.18M 33.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2a.pth 31%[=====> ] 318.60M 33.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2a.pth 31%[=====> ] 320.29M 33.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2a.pth 31%[=====> ] 320.57M 30.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2a.pth 32%[=====> ] 335.18M 30.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2a.pth 33%[=====> ] 345.53M 31.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2a.pth 34%[=====> ] 350.82M 29.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2a.pth 35%[======> ] 360.98M 31.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "a.pth 36%[======> ] 366.20M 29.6MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 37%[======> ] 380.96M 30.8MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 38%[======> ] 392.79M 32.0MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 39%[======> ] 396.73M 29.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 40%[=======> ] 411.99M 29.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 41%[=======> ] 426.73M 28.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 42%[=======> ] 427.25M 29.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 42%[=======> ] 435.25M 27.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 43%[=======> ] 438.04M 28.2MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 43%[=======> ] 442.05M 29.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 43%[=======> ] 446.00M 31.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 44%[=======> ] 457.24M 33.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 45%[========> ] 457.89M 31.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 46%[========> ] 473.02M 34.8MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 48%[========> ] 488.28M 34.1MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 49%[========> ] 503.03M 34.6MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 50%[=========> ] 518.29M 37.3MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 51%[=========> ] 525.10M 35.8MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 52%[=========> ] 534.05M 34.4MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 53%[=========> ] 548.80M 34.4MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 55%[==========> ] 562.75M 33.8MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 56%[==========> ] 579.31M 36.0MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 57%[==========> ] 581.49M 36.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 58%[==========> ] 592.93M 37.4MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 58%[==========> ] 595.09M 37.1MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 60%[===========> ] 610.35M 38.5MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 61%[===========> ] 625.61M 38.7MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 62%[===========> ] 640.36M 39.9MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 64%[===========> ] 653.30M 39.5MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 64%[===========> ] 656.13M 38.5MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 66%[============> ] 671.38M 38.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2a 67%[============> ] 685.57M 39.7MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2a. 67%[============> ] 686.64M 37.5MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2a.p 68%[============> ] 701.39M 37.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2a.pt 69%[============> ] 708.59M 38.8MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2a.pth 70%[=============> ] 715.34M 38.2MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2a.pth 71%[=============> ] 731.91M 40.7MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2a.pth 73%[=============> ] 747.17M 38.0MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2a.pth 73%[=============> ] 747.75M 38.0MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2a.pth 74%[=============> ] 762.42M 40.2MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2a.pth 75%[==============> ] 762.94M 37.2MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2a.pth 76%[==============> ] 776.37M 36.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2a.pth 76%[==============> ] 778.20M 34.9MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2a.pth 77%[==============> ] 791.63M 38.1MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2a.pth 78%[==============> ] 793.46M 36.0MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2a.pth 79%[==============> ] 808.20M 38.6MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2a.pth 80%[===============> ] 816.07M 36.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2a.pth 81%[===============> ] 823.97M 34.7MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2a.pth 82%[===============> ] 837.41M 36.4MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "a.pth 83%[===============> ] 853.98M 38.3MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 85%[================> ] 867.67M 38.4MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 85%[================> ] 873.17M 39.1MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 87%[================> ] 885.01M 36.1MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 88%[================> ] 899.75M 37.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 88%[================> ] 900.40M 34.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 90%[=================> ] 915.53M 35.4MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 91%[=================> ] 930.78M 37.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 92%[=================> ] 945.53M 40.9MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 93%[=================> ] 946.04M 37.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 94%[=================> ] 959.48M 38.0MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 94%[=================> ] 961.30M 33.1MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 95%[==================> ] 976.05M 34.9MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 97%[==================> ] 991.31M 34.8MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 97%[==================> ] 992.94M 35.0MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 98%[==================> ] 1005M 34.7MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 99%[==================> ] 1016M 33.7MB/s eta 0s \r", - "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 33.9MB/s in 28s \r\n", - "\r\n", - "2023-10-11 08:02:52 (36.4 MB/s) - ‘v5-L6-D2048-E0_01-split-2a.pth’ saved [1066536657/1066536657]\r\n", - "\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-10-11 08:02:53-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.7, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-10-11 08:02:53-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.95, 3.162.112.100, 3.162.112.2, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.95|:443... connected.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 1066536657 (1017M) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L6-D2048-E0_01-split-2b.pth’\r\n", - "\r\n", - "\r", - " v5-L6-D20 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 1%[ ] 14.74M 67.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 2%[ ] 28.69M 63.1MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 3%[ ] 30.52M 42.3MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 4%[ ] 45.26M 45.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 4%[ ] 45.78M 37.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 6%[> ] 61.03M 41.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 7%[> ] 75.78M 45.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 8%[> ] 85.94M 45.6MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 9%[> ] 91.55M 40.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 10%[=> ] 106.81M 40.5MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 40.2MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 13%[=> ] 137.33M 41.7MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.9MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 16%[==> ] 167.33M 43.6MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 17%[==> ] 181.32M 44.9MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 18%[==> ] 183.10M 41.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2b 19%[==> ] 196.53M 41.8MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2b. 19%[==> ] 198.36M 39.0MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2b.p 20%[===> ] 213.11M 39.0MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2b.pt 21%[===> ] 220.29M 40.8MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2b.pth 22%[===> ] 228.36M 39.9MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2b.pth 24%[===> ] 244.13M 40.3MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2b.pth 25%[====> ] 259.40M 40.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2b.pth 26%[====> ] 274.14M 42.1MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2b.pth 27%[====> ] 274.66M 38.5MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2b.pth 28%[====> ] 289.92M 41.6MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2b.pth 30%[=====> ] 305.18M 41.6MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2b.pth 31%[=====> ] 320.43M 40.9MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2b.pth 32%[=====> ] 335.18M 41.2MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2b.pth 33%[=====> ] 335.69M 38.5MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2b.pth 34%[=====> ] 350.95M 38.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2b.pth 35%[======> ] 365.70M 40.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2b.pth 36%[======> ] 366.20M 38.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2b.pth 37%[======> ] 381.47M 38.8MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "b.pth 37%[======> ] 385.65M 39.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 39%[======> ] 396.73M 36.3MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 39%[======> ] 406.75M 37.5MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 40%[=======> ] 411.99M 33.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 42%[=======> ] 427.25M 33.6MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 43%[=======> ] 441.98M 32.7MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 43%[=======> ] 442.51M 32.5MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 44%[=======> ] 457.25M 32.2MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 45%[========> ] 457.76M 32.2MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 46%[========> ] 472.50M 31.5MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 46%[========> ] 473.02M 31.5MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 48%[========> ] 488.28M 30.9MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 49%[========> ] 503.54M 33.6MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 50%[=========> ] 518.29M 34.2MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 51%[=========> ] 518.80M 34.8MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 52%[=========> ] 534.05M 34.2MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 54%[=========> ] 549.31M 37.5MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 55%[==========> ] 564.06M 37.7MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 55%[==========> ] 565.78M 37.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 57%[==========> ] 579.83M 37.8MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 58%[==========> ] 595.09M 39.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 60%[===========> ] 610.35M 40.9MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 61%[===========> ] 625.47M 44.1MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 61%[===========> ] 629.82M 42.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 63%[===========> ] 640.87M 42.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 64%[===========> ] 656.13M 45.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 66%[============> ] 671.38M 45.8MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 67%[============> ] 686.64M 47.1MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 69%[============> ] 701.90M 47.0MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 70%[=============> ] 717.16M 46.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 71%[=============> ] 730.60M 47.8MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2b 73%[=============> ] 747.17M 45.9MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2b. 74%[=============> ] 755.98M 45.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2b.p 75%[==============> ] 762.94M 43.2MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2b.pt 76%[==============> ] 777.68M 45.4MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2b.pth 76%[==============> ] 778.32M 42.7MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2b.pth 78%[==============> ] 793.46M 42.0MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2b.pth 79%[==============> ] 808.20M 41.7MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2b.pth 80%[===============> ] 814.09M 42.3MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2b.pth 80%[===============> ] 823.46M 41.0MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2b.pth 81%[===============> ] 823.97M 40.5MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2b.pth 82%[===============> ] 838.71M 38.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2b.pth 83%[===============> ] 853.98M 41.4MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2b.pth 84%[===============> ] 854.61M 38.0MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2b.pth 85%[================> ] 869.24M 35.7MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2b.pth 85%[================> ] 869.75M 35.3MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2b.pth 86%[================> ] 875.74M 34.3MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2b.pth 87%[================> ] 885.01M 32.5MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2b.pth 88%[================> ] 900.27M 33.8MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "b.pth 89%[================> ] 913.70M 34.5MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 90%[=================> ] 924.21M 34.8MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 91%[=================> ] 930.27M 35.3MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 93%[=================> ] 946.04M 34.9MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 94%[=================> ] 961.30M 37.2MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 95%[==================> ] 970.14M 35.7MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 96%[==================> ] 976.55M 34.9MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 97%[==================> ] 991.82M 37.0MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 98%[==================> ] 998.13M 35.6MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 98%[==================> ] 1007M 37.2MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 99%[==================> ] 1016M 34.9MB/s eta 0s \r", - "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 35.1MB/s in 26s \r\n", - "\r\n", - "2023-10-11 08:03:19 (38.9 MB/s) - ‘v5-L6-D2048-E0_01-split-2b.pth’ saved [1066536657/1066536657]\r\n", - "\r\n" - ] - } - ], - "source": [ - "# Get the init split model, and finetune from there\n", - "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\"\n", - "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2a3cd2d1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:19.666619Z", - "iopub.status.busy": "2023-10-11T08:03:19.665958Z", - "iopub.status.idle": "2023-10-11T08:03:29.305787Z", - "shell.execute_reply": "2023-10-11T08:03:29.304873Z" - }, - "papermill": { - "duration": 9.658186, - "end_time": "2023-10-11T08:03:29.308744", - "exception": false, - "start_time": "2023-10-11T08:03:19.650558", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Saving the dataset (0/2 shards): 0%| | 0/27200 [00:00\r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", - " self.instantiate_classes()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", - " self.config_init = self.parser.instantiate_classes(self.config)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", - " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", - " component.instantiate_class(component, cfg)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", - " parent[key] = group.group_class(**value)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2a.pth' does not exist\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion A3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-split-2a.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "53867c42", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:46.969471Z", - "iopub.status.busy": "2023-10-11T08:03:46.969019Z", - "iopub.status.idle": "2023-10-11T08:03:50.682437Z", - "shell.execute_reply": "2023-10-11T08:03:50.680986Z" - }, - "papermill": { - "duration": 3.732808, - "end_time": "2023-10-11T08:03:50.685581", - "exception": false, - "start_time": "2023-10-11T08:03:46.952773", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:03:49,278] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-a3/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5688e577", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:50.806267Z", - "iopub.status.busy": "2023-10-11T08:03:50.804997Z", - "iopub.status.idle": "2023-10-11T08:03:56.788036Z", - "shell.execute_reply": "2023-10-11T08:03:56.786568Z" - }, - "papermill": { - "duration": 6.08675, - "end_time": "2023-10-11T08:03:56.790510", - "exception": false, - "start_time": "2023-10-11T08:03:50.703760", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:03:54,934] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"cuda fp32\"" - ] - }, - { - "cell_type": "markdown", - "id": "b4927e87", - "metadata": { - "papermill": { - "duration": 0.015295, - "end_time": "2023-10-11T08:03:56.820640", - "exception": false, - "start_time": "2023-10-11T08:03:56.805345", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Enwiki Stage 3 : Split-Baseline-B training" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6bdd285a", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:56.853495Z", - "iopub.status.busy": "2023-10-11T08:03:56.852946Z", - "iopub.status.idle": "2023-10-11T08:04:11.500794Z", - "shell.execute_reply": "2023-10-11T08:04:11.499336Z" - }, - "papermill": { - "duration": 14.668001, - "end_time": "2023-10-11T08:04:11.503644", - "exception": false, - "start_time": "2023-10-11T08:03:56.835643", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:01,096] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'].\r\n", - " rank_zero_warn(\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1732922148\r\n", - " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", - "Global seed set to 1732922148\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 278, in \r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", - " self.instantiate_classes()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", - " self.config_init = self.parser.instantiate_classes(self.config)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", - " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", - " component.instantiate_class(component, cfg)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", - " parent[key] = group.group_class(**value)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2b.pth' does not exist\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion B3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-split-2b.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ae4623a1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:04:11.546046Z", - "iopub.status.busy": "2023-10-11T08:04:11.544870Z", - "iopub.status.idle": "2023-10-11T08:04:15.274349Z", - "shell.execute_reply": "2023-10-11T08:04:15.272957Z" - }, - "papermill": { - "duration": 3.754115, - "end_time": "2023-10-11T08:04:15.277163", - "exception": false, - "start_time": "2023-10-11T08:04:11.523048", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:13,869] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8e1b1152", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:04:15.319747Z", - "iopub.status.busy": "2023-10-11T08:04:15.318636Z", - "iopub.status.idle": "2023-10-11T08:04:21.268526Z", - "shell.execute_reply": "2023-10-11T08:04:21.267073Z" - }, - "papermill": { - "duration": 5.974644, - "end_time": "2023-10-11T08:04:21.271495", - "exception": false, - "start_time": "2023-10-11T08:04:15.296851", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:19,430] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n", - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"cuda fp32\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "papermill": { - "default_parameters": {}, - "duration": 119.315066, - "end_time": "2023-10-11T08:04:21.714050", - "environment_variables": {}, - "exception": null, - "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb", - "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb", - "parameters": {}, - "start_time": "2023-10-11T08:02:22.398984", - "version": "2.4.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb deleted file mode 100644 index 31a5b8eeab80f4fb0b5a736155d2fd141fa7fd54..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0aa2c37ab25e53ed3e45a9e7b5b09d1ac2d2f627412df5c98cc1f113838d800 -size 15734950 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb deleted file mode 100644 index 9810fb95056168b6f333635a6ad59587d31b6e23..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d643e2a64a0f7323eb7b14b90ce5a0e5457818349c75e666dbf52b7319f5de72 -size 15733849 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth deleted file mode 100644 index b42c1d46426286791c4b684a05f90055dccae4d1..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:849b57b4d493d40313ef04b30ffc22ec6f5cb99e05225615ee0cb00acb78a95d -size 1066537077 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index e671afa6d1c25ea33703bbbdf389a33493910501..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8413565273ef40f61db246dcbf793e045b39d1163e18885441be5a16d733f34c -size 1066537077 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth deleted file mode 100644 index b5857b83e411d72861863eda5c9c32a7132e1bfe..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:235d88b0aa939596392f2b5734a426940535816aa13106498974a809051a4c75 -size 1066537217 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth deleted file mode 100644 index e145614e20e99af77e84454e6ef16a39a61c1d9f..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1afd8d92632792f498805ac222d159524badf4ecbcaaae597060b6bb87a53110 -size 1066538057 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth deleted file mode 100644 index 381a48603dc68a10750a4b7d78e79594e6bde52d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e61d8f8901d1eb50759f0242e2886678ed24b9931295a270b14120ba74cb5c3 -size 1066538057 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth deleted file mode 100644 index ded0f392eb463040cbb0e4a66326c5ae08bcbda6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2d60ede71bc384ee4eff0a591b3fa57dd670c27e5e8ce5eadf25a7f0d7e226d -size 1066538337 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth deleted file mode 100644 index c498833cf2e305eacbd6ebd9485e9a5d6706eca2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea -size 1066536657 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth deleted file mode 100644 index b1bfb4e806da5dde645c9feb2acb0b0140ce43c6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25 -size 1066536657 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth deleted file mode 100644 index bfe873e0bdd09173577c50c9f6f3634155ade0ce..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f10f8f00c42b6408db81a3b26d53411c41edc7f23f5097ac095ad3096d6c5dc1 -size 1066537497 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth deleted file mode 100644 index f2aa96bd9b7f4e604e397947323f5156ee2fa129..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f07a8414cd0cd1c3df705dff8a0f2142231171ee52a94d12c55dfe7c888fef7 -size 1066537497 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb deleted file mode 100644 index e04ec817954792ce45a871de0ebed229db957ffd..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fccffc430231ad06fdb02a7e50ea57acfbeae3c42a97b018f62f937d30736e4 -size 16519239 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb deleted file mode 100644 index 955412d6f333912148d0dc1023c32ce58509ccd2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7260b3fe80de461d6dc923b21af87361f71e26a4a7191d51dd9665403728ddfa -size 15732960 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb deleted file mode 100644 index 5c78f23bf3d5e33fcb6836c803015836a2da0149..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f95adf89d498a4dd58af22ba192b2fd4d08ceec250784c7e9f6f9b8de0fed2bc -size 15855123 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb deleted file mode 100644 index 50ba5f3c8e80bcfb1a8005406d9e4f78979d8dac..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c308e5ae9f8fde5fd24cafccf60917dca9c97fc2e0a5fbcfa01027d6d50e927d -size 16623766 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb index 08a47ab9cc56551d38f353b7abad52bfde0da722..28d90ab93cca447d2f6201dc3c22328a198e9d59 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb @@ -1,3 +1,140296 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0db5673fbf59261a65cafb957510a87538f738b6cebffd10ed532db38dfdcb01 -size 53132732 +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "fb8bab66", + "metadata": { + "papermill": { + "duration": 0.004984, + "end_time": "2023-09-06T18:15:07.813560", + "exception": false, + "start_time": "2023-09-06T18:15:07.808576", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "644a18dd", + "metadata": { + "papermill": { + "duration": 0.002407, + "end_time": "2023-09-06T18:15:07.820470", + "exception": false, + "start_time": "2023-09-06T18:15:07.818063", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "88954417", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:07.826313Z", + "iopub.status.busy": "2023-09-06T18:15:07.826023Z", + "iopub.status.idle": "2023-09-06T18:15:08.710191Z", + "shell.execute_reply": "2023-09-06T18:15:08.709357Z" + }, + "papermill": { + "duration": 0.889532, + "end_time": "2023-09-06T18:15:08.712219", + "exception": false, + "start_time": "2023-09-06T18:15:07.822687", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" + ] + } + ], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54728414", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:08.723120Z", + "iopub.status.busy": "2023-09-06T18:15:08.722681Z", + "iopub.status.idle": "2023-09-06T18:15:10.844392Z", + "shell.execute_reply": "2023-09-06T18:15:10.843613Z" + }, + "papermill": { + "duration": 2.129186, + "end_time": "2023-09-06T18:15:10.846223", + "exception": false, + "start_time": "2023-09-06T18:15:08.717037", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4e5c05c9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:10.856132Z", + "iopub.status.busy": "2023-09-06T18:15:10.855690Z", + "iopub.status.idle": "2023-09-06T18:15:10.864690Z", + "shell.execute_reply": "2023-09-06T18:15:10.864101Z" + }, + "papermill": { + "duration": 0.015147, + "end_time": "2023-09-06T18:15:10.866187", + "exception": false, + "start_time": "2023-09-06T18:15:10.851040", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT:" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "DIR_NAME: L12-D2048-E1e-1-ctx4k\n", + "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k\n", + "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=12\n", + "EMBED_DIM=2048\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9a735016", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:10.875858Z", + "iopub.status.busy": "2023-09-06T18:15:10.875465Z", + "iopub.status.idle": "2023-09-06T18:15:26.773342Z", + "shell.execute_reply": "2023-09-06T18:15:26.772573Z" + }, + "papermill": { + "duration": 15.904517, + "end_time": "2023-09-06T18:15:26.775109", + "exception": false, + "start_time": "2023-09-06T18:15:10.870592", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-06 18:15:10-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.110, 13.33.33.20, 13.33.33.55, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.110|:443... connected.\r\n", + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694283311&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MzMxMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=umgcHAKKVfM%7Elnvkc01zRnwcLHn3wrQfHfl-f-B8cnLRauI-kG63DbJ3BWLUFwsnUXKFrGoEYt8IW3AjE2J9QanT4tt1Zh34ojm5pdkTt4PvrIoX0iCwHRRIJGgV9h%7EF%7EMpsuweAJiHAbk61U4GSdt3fnpVaAKUKKa-VNDcmS3LwTuOx3gQgTqbTc-9ZMz14QcAVZV%7EgGZO5D1Owr0g0db9eatciOvhG7%7EnN%7ES%7EQIhVVZENXFPv0Ej8Jr11N0lmHdGU%7EBXm0fqUL1lCSaAEwCoIF%7EGrG2gtP049PGM9tapGgm6-4y4HbfsNVIxH-iRn-c2lvkjCpUSIOpMzce6wNvA__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 18:15:11-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694283311&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MzMxMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=umgcHAKKVfM%7Elnvkc01zRnwcLHn3wrQfHfl-f-B8cnLRauI-kG63DbJ3BWLUFwsnUXKFrGoEYt8IW3AjE2J9QanT4tt1Zh34ojm5pdkTt4PvrIoX0iCwHRRIJGgV9h%7EF%7EMpsuweAJiHAbk61U4GSdt3fnpVaAKUKKa-VNDcmS3LwTuOx3gQgTqbTc-9ZMz14QcAVZV%7EgGZO5D1Owr0g0db9eatciOvhG7%7EnN%7ES%7EQIhVVZENXFPv0Ej8Jr11N0lmHdGU%7EBXm0fqUL1lCSaAEwCoIF%7EGrG2gtP049PGM9tapGgm6-4y4HbfsNVIxH-iRn-c2lvkjCpUSIOpMzce6wNvA__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18.155.68.128, 18.155.68.73, 18.155.68.98, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.128|:443... connected.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HTTP request sent, awaiting response... 200 OK\r\n", + "Length: 1721187013 (1.6G) [binary/octet-stream]\r\n", + "Saving to: ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’\r\n", + "\r\n", + "\r", + " v5r3-L12- 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 1%[ ] 21.14M 106MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 2%[ ] 43.53M 109MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 4%[ ] 65.91M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 5%[> ] 88.31M 110MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 6%[> ] 110.72M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 8%[> ] 133.11M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 9%[> ] 155.46M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 10%[=> ] 177.86M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 12%[=> ] 200.27M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 13%[=> ] 222.66M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 14%[=> ] 245.07M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 16%[==> ] 267.48M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 17%[==> ] 289.89M 111MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 19%[==> ] 312.30M 112MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 20%[===> ] 334.71M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 21%[===> ] 357.11M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 23%[===> ] 379.51M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 24%[===> ] 401.92M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 25%[====> ] 424.32M 112MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 27%[====> ] 446.72M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 28%[====> ] 469.12M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 29%[====> ] 491.52M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8-E0_1-enwiki-4k.pt 31%[=====> ] 513.93M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-enwiki-4k.pth 32%[=====> ] 536.33M 112MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-enwiki-4k.pth 34%[=====> ] 558.73M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-enwiki-4k.pth 35%[======> ] 581.14M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-enwiki-4k.pth 36%[======> ] 602.65M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-enwiki-4k.pth 38%[======> ] 624.87M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-enwiki-4k.pth 39%[======> ] 647.26M 112MB/s eta 10s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "enwiki-4k.pth 40%[=======> ] 669.68M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "nwiki-4k.pth 42%[=======> ] 692.08M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "wiki-4k.pth 43%[=======> ] 714.45M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "iki-4k.pth 44%[=======> ] 736.88M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 46%[========> ] 759.32M 112MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 47%[========> ] 781.71M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-4k.pth 48%[========> ] 804.12M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4k.pth 50%[=========> ] 826.52M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 51%[=========> ] 848.92M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 53%[=========> ] 871.34M 112MB/s eta 8s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 54%[=========> ] 893.74M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 55%[==========> ] 916.15M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 57%[==========> ] 938.55M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 58%[==========> ] 960.96M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 59%[==========> ] 983.35M 112MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 61%[===========> ] 1006M 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r 62%[===========> ] 1.00G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3 64%[===========> ] 1.03G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3- 65%[============> ] 1.05G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L 66%[============> ] 1.07G 112MB/s eta 6s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L1 68%[============> ] 1.09G 112MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12 68%[============> ] 1.09G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12- 69%[============> ] 1.11G 104MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D 70%[=============> ] 1.13G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2 72%[=============> ] 1.16G 105MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D20 73%[=============> ] 1.18G 104MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D204 74%[=============> ] 1.20G 105MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048 76%[==============> ] 1.22G 105MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048- 77%[==============> ] 1.24G 104MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E 79%[==============> ] 1.27G 105MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0 80%[===============> ] 1.29G 105MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L12-D2048-E0_ 81%[===============> ] 1.31G 104MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 83%[===============> ] 1.33G 105MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5r3-L12-D2048-E0_1- 84%[===============> ] 1.35G 105MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "r3-L12-D2048-E0_1-e 85%[================> ] 1.38G 104MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "3-L12-D2048-E0_1-en 87%[================> ] 1.40G 103MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L12-D2048-E0_1-enw 88%[================> ] 1.42G 110MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L12-D2048-E0_1-enwi 89%[================> ] 1.44G 111MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "12-D2048-E0_1-enwik 91%[=================> ] 1.46G 111MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2-D2048-E0_1-enwiki 92%[=================> ] 1.48G 110MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D2048-E0_1-enwiki- 93%[=================> ] 1.50G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D2048-E0_1-enwiki-4 95%[==================> ] 1.52G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2048-E0_1-enwiki-4k 96%[==================> ] 1.55G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "048-E0_1-enwiki-4k. 97%[==================> ] 1.57G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "48-E0_1-enwiki-4k.p 99%[==================> ] 1.59G 110MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L12-D2048-E0_1 100%[===================>] 1.60G 110MB/s in 15s \r\n", + "\r\n", + "2023-09-06 18:15:26 (110 MB/s) - ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’ saved [1721187013/1721187013]\r\n", + "\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 1.6G\r\n", + "drwxr-xr-x 2 root root 3 Sep 6 18:15 .\r\n", + "drwxr-xr-x 20 root root 24 Sep 6 18:15 ..\r\n", + "-rw-r--r-- 1 root root 1.7G Sep 6 15:04 v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n" + ] + } + ], + "source": [ + "# Download the model directly (stop gap till HF sync issues is resolved)\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-enwiki-4k.pth\"\n", + "\n", + "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", + " ls -alh ." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2b6d6b9e", + "metadata": { + "papermill": { + "duration": 0.005279, + "end_time": "2023-09-06T18:15:26.791307", + "exception": false, + "start_time": "2023-09-06T18:15:26.786028", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Enwiki Stage 2 : Basic Instruct Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ec611ca0", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:15:26.803278Z", + "iopub.status.busy": "2023-09-06T18:15:26.803020Z", + "iopub.status.idle": "2023-09-06T18:15:34.152162Z", + "shell.execute_reply": "2023-09-06T18:15:34.151364Z" + }, + "papermill": { + "duration": 7.357428, + "end_time": "2023-09-06T18:15:34.154153", + "exception": false, + "start_time": "2023-09-06T18:15:26.796725", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Saving the dataset (0/1 shards): 0%| | 0/14932 [00:00=12.1), as this is known to have freeze issues\r\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", + "#\r\n", + "\r\n", + "[RWKV.model] Configuring optimizer with\r\n", + " - lr_init: 4.000e-04 (0.0004)\r\n", + " - lr_final: 3.000e-04 (0.0003)\r\n", + "\r\n", + "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n", + "Detected CUDA files, patching ldflags\r\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", + "Building extension module fused_adam...\r\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ninja: no work to do.\r\n", + "Loading extension module fused_adam...\r\n", + "Time to load fused_adam op: 0.059059858322143555 seconds\r\n", + "Loading `train_dataloader` to estimate number of stepping batches.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rank: 0 partition count [1, 1] and sizes[(860549120, False), (768, False)] \r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + " | Name | Type | Params\r\n", + "--------------------------------------\r\n", + "0 | emb | Embedding | 102 M \r\n", + "1 | blocks | ModuleList | 654 M \r\n", + "2 | ln_out | LayerNorm | 4.1 K \r\n", + "3 | head | Linear | 102 M \r\n", + "--------------------------------------\r\n", + "860 M Trainable params\r\n", + "0 Non-trainable params\r\n", + "860 M Total params\r\n", + "3,442.200 Total estimated model params size (MB)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Training: 0it [00:00, ?it/s]\r", + "Training: 0%| | 0/14932 [00:00\r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 542, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2112, in load_dataset\r\n", + " builder_instance = load_dataset_builder(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1798, in load_dataset_builder\r\n", + " dataset_module = dataset_module_factory(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1413, in dataset_module_factory\r\n", + " ).get_module()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 948, in get_module\r\n", + " patterns = sanitize_patterns(self.data_files) if self.data_files is not None else get_data_patterns(base_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/data_files.py\", line 459, in get_data_patterns\r\n", + " raise EmptyDatasetError(f\"The directory at {base_path} doesn't contain any data files\") from None\r\n", + "datasets.data_files.EmptyDatasetError: The directory at /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset doesn't contain any data files\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/t4ttsqx6\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v31\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_183907-t4ttsqx6/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-instruct.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Instruct (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-instruct/\" \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-instruct.pth\" \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d08eb257", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:26.893633Z", + "iopub.status.busy": "2023-09-06T18:39:26.893337Z", + "iopub.status.idle": "2023-09-06T18:39:29.382332Z", + "shell.execute_reply": "2023-09-06T18:39:29.381567Z" + }, + "papermill": { + "duration": 3.132835, + "end_time": "2023-09-06T18:39:29.384061", + "exception": false, + "start_time": "2023-09-06T18:39:26.251226", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 18:39:28,513] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-instruct/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-instruct/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-instruct.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "39060592", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:30.624854Z", + "iopub.status.busy": "2023-09-06T18:39:30.624554Z", + "iopub.status.idle": "2023-09-06T18:39:30.862069Z", + "shell.execute_reply": "2023-09-06T18:39:30.861310Z" + }, + "papermill": { + "duration": 0.873983, + "end_time": "2023-09-06T18:39:30.863701", + "exception": false, + "start_time": "2023-09-06T18:39:29.989718", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "markdown", + "id": "9ed8fd21", + "metadata": { + "papermill": { + "duration": 0.601874, + "end_time": "2023-09-06T18:39:32.106539", + "exception": false, + "start_time": "2023-09-06T18:39:31.504665", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Tune 2 : Low ctx size (512), memory training\n", + "\n", + "- Tune 2: Low ctx size (512), Training with instruction & input masked. This forces the actual memory training on the output tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "04256ca3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:33.385937Z", + "iopub.status.busy": "2023-09-06T18:39:33.385635Z", + "iopub.status.idle": "2023-09-06T18:39:33.431947Z", + "shell.execute_reply": "2023-09-06T18:39:33.431311Z" + }, + "papermill": { + "duration": 0.688742, + "end_time": "2023-09-06T18:39:33.433721", + "exception": false, + "start_time": "2023-09-06T18:39:32.744979", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "bash: line 7: cd: {CONFIG_DIR}: No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 10K\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 2 root root 2 Sep 6 18:39 .\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 6 root root 11 Sep 6 18:39 ..\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Go to config dir\n", + "cd \"{CONFIG_DIR}\"\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ../dataset\n", + "rm -rf ../dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# We switch over to fully masked instruct+input, to properly learn the memorization task\n", + "#\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 5000 &\n", + "for i in {5..95..5} \n", + "do\n", + " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 5000 & \n", + "done\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-100-count.jsonl 100 5000 &\n", + "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-200-count.jsonl 200 5000 &\n", + "\n", + "#\n", + "# We mixin the shuffled word list, so that we ensure all words / tokens are learned\n", + "# however this might intrduce an exclusion bias (if seen this word, never repeat it), \n", + "# so we limit the mixture of this data samples\n", + "#\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-10-count.jsonl 10 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-15-count.jsonl 15 20 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-25-count.jsonl 25 30 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-50-count.jsonl 50 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-75-count.jsonl 75 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-100-count.jsonl 100 50 &\n", + "python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-200-count.jsonl 200 50 &\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ../dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1fc0cef1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:34.676127Z", + "iopub.status.busy": "2023-09-06T18:39:34.675939Z", + "iopub.status.idle": "2023-09-06T18:39:53.301947Z", + "shell.execute_reply": "2023-09-06T18:39:53.301048Z" + }, + "papermill": { + "duration": 19.230409, + "end_time": "2023-09-06T18:39:53.303981", + "exception": false, + "start_time": "2023-09-06T18:39:34.073572", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 18:39:37,542] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L12-D2048-E0_1-mem-instruct.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 2825309834\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 2825309834\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230906_183940-f9fnknh5\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f9fnknh5\u001b[0m\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", + " self.instantiate_classes()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", + " self.config_init = self.parser.instantiate_classes(self.config)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", + " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", + " component.instantiate_class(component, cfg)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", + " parent[key] = group.group_class(**value)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 559, in __init__\r\n", + " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", + "ValueError: load_model file '../model/v5r3-L12-D2048-E0_1-mem-instruct.pth' does not exist\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f9fnknh5\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v32\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_183940-f9fnknh5/logs\u001b[0m\r\n" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-512 (train-ctx=512, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/\" \\\n", + " --model.lr_init=5e-4 \\\n", + " --model.lr_final=4e-4 \\\n", + " --data.max_token_size=512 \\\n", + " --model.ctx_len=512 \\\n", + " --model.bptt_learning_range=1 \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-instruct.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f9b70eea", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:54.579000Z", + "iopub.status.busy": "2023-09-06T18:39:54.578746Z", + "iopub.status.idle": "2023-09-06T18:39:57.009190Z", + "shell.execute_reply": "2023-09-06T18:39:57.008417Z" + }, + "papermill": { + "duration": 3.068353, + "end_time": "2023-09-06T18:39:57.010860", + "exception": false, + "start_time": "2023-09-06T18:39:53.942507", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 18:39:56,146] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L12-D2048-E0_1-mem-ctx-512/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L12-D2048-E0_1-mem-ctx-512.pth': No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \\\n", + " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-512/last.ckpt\" \\\n", + " \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "01fca15c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T18:39:58.256313Z", + "iopub.status.busy": "2023-09-06T18:39:58.256022Z", + "iopub.status.idle": "2023-09-06T18:39:58.488872Z", + "shell.execute_reply": "2023-09-06T18:39:58.488054Z" + }, + "papermill": { + "duration": 0.874819, + "end_time": "2023-09-06T18:39:58.490725", + "exception": false, + "start_time": "2023-09-06T18:39:57.615906", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], + "source": [ + "# Lets do a quick memory test\n", + "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 1492.297607, + "end_time": "2023-09-06T18:39:59.213287", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", + "parameters": {}, + "start_time": "2023-09-06T18:15:06.915680", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth index 5d66a986e13f105e9d1ef1b349003fdf4bd0f4eb..42e0286bc0b229cbc49e0036efc88ed43b44e67e 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-instruct.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf36cb2931ec033f98652643fa297578e6492fdde20223c63fc118f315ec34da +oid sha256:48e76cb4f838aac276f7eaaa0eb325338060592ab995b8cbf2bb0e0b44170c2e size 1721188709 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-ctx-512.pth b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-ctx-512.pth deleted file mode 100644 index 92c2877599b885c7d94dd39bba81674afa587188..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-ctx-512.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:428165f24369bec63746c64c9da6816dc26c17c18a80be1fff372da78294329c -size 1721187621 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-instruct.pth deleted file mode 100644 index 0056aaaf281f52fecf535c5252960e0e5f123453..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-mem-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:106e6576d2bbb4c31571bf79e574b3fb8233217a6e8a8739b4d8d75de77e73a1 -size 1721187893 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb index 14ee9dd00dac7940061b9d8cc3baea15d28e8b03..7813b2792bd419cca4a8009d8e0283f3454e6922 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part1.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1ad9f654a7f6ed10dcf0b15fce28a0b191ce8f52bffde64090163e063ac19ab -size 24177949 +oid sha256:562fdf69a4267c753b425812f407e66acd1a0e0bd0dc29eef451d5f9af9193d1 +size 24740731 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb deleted file mode 100644 index 31c7d50e0c7c5197bc24cd44c364ab59a245d4e8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:04b893c23868d4438d1733d8b85afe79f3065c7074b4f3d1ebc27a0b29b350a0 -size 52755279 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb deleted file mode 100644 index 7e206cc12ec5ccf27fa871d361750dce04655d13..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0439ce7fb6866af36cb53bbddaf6a1ed49656c85a84d6a2aabd6754b30fa2109 -size 61159745 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb deleted file mode 100644 index 7ffd7172b586e0b8857d504b71f3808861d311aa..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3855c5ed19072f2ecaec4294c3945a3290e692d15c0aa7351c7d4917404fbf65 -size 38208798 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb deleted file mode 100644 index 2b40078b2fa26fff7d07cf99fa6ea18a8e9a50b1..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e318ecc4d20d89232f8a0677c54f8489d602cb3d66632cadb29c028c917eb00e -size 30322339 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth index 6b524ff4d5b17f08354f3346a9320b826b489ff9..ce2745ae53706070d3d5b763f87844aac4ce17d5 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a98bb4421b50083bfbb0e4233838456ff812373a85d2bf87d23cdb8aa6b8d702 +oid sha256:7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e size 1066536937 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth index ccb59159ba5b615cb7e3a06a2a6bdc929502ce52..0c8960eb4ddd84ee12868e217cc7928f7819eb12 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a83bdbbf6d686bfa77529fc9bbde3a91fc8d182e1dc33ce8d18f2a0abbe2576 +oid sha256:ab48651fdc2dfda5e4cf92a59fb9ca1d0be027063c7c26d545bdd92b06e4846c size 1066537777 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth deleted file mode 100644 index 8af2cc985147220953dab23cbe6635297e1208ab..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf3a15d56db013d138bed6780d58c4362ca96b3ef98fb98e2d1444f325c582b5 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth deleted file mode 100644 index ec338ad0a412b427fe4cc7115959eb96face9f73..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76cc2d79013781f18c6507848a138150084d37c3aae0f20145e7e5854bcabb99 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth deleted file mode 100644 index ae6d693b3aafaef43037882c09d0bebc48f35c22..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f1b8811c3f2f8c12c564edb973392bb486c9d674152f3eb769ac206c20bcfc0 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth index baae90af34d9f56b91e9010b43112a84b2184fb0..fef48a90cc9cbeda1419c0affb4e305af1f11f0e 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f +oid sha256:95f9312c1cdf592ccbfacc32bafa627a5543fca492cdee40681636ae5c3cc360 size 1066537217 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth deleted file mode 100644 index 18bf2327f7771b99c67b0d997cbdf1ba8f34902d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db7ab09a447f96d9fcdbee4761bd35f2c3bce9868d3136959ed601a8e478083c -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth index 063c157afbc559f19f9a7a0eb7d82adfc2eb06ce..cd14f2e7eda0882008bec8704554e5ddabab3b80 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-instruct.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9ec28149b79524846c14ec90b0a206a3529abb584575553320f2427be475225 +oid sha256:75de255d9811b4c32d41d9d5edd5fc38b8f19b1ee7f2ac336e30c26aa09d9686 size 1066537357 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth index 1436a34a06eb09d5b2334edf591409a6c7681d4c..ab0a70373f4499e6d8ceca3ce6107ebcf0ef2f82 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-neox-init.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98ff68af85a7db87d6e8aeb68d8e36d403dc41e947e090dcbc8e13dbeeb50406 +oid sha256:ea0b14c5aafb10ad2506806e44a34c09ad518076fc485ab5dfec004219cf6db5 size 1066537497 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb deleted file mode 100644 index 6518411075f0671db66215e751dcfb84e7744907..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0977926c2d6b24d07dbdbaff2712954f647d5b697863bc918c4ffc0f431f572 -size 24384207 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb deleted file mode 100644 index 4db294f20953a07e8a6d96806c7609973258cd21..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:97d1563b2840253fdb03cb024ff0447bb295fdc9a3c255c4c39bc01eb46720a0 -size 48457692 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb deleted file mode 100644 index 67972444f9ed3d06fa1d595845dd1bbdebb54e5f..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46167bb1edba84bd184533d3d424a8c4d9ed5a15d8b93474596453f235462f13 -size 58866814 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb deleted file mode 100644 index d2df3a3f63985ebd40d91463248c0db8fe7ba56b..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17f586e9a94c3fbf463e84c518ca7712b6fd539d3c8dda8e4893115a1298c8d4 -size 37212168 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb deleted file mode 100644 index 4c2298987669d56cd07135b0ec4f9da50c75303c..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b5c4dec751996e61882229f30bba0e005ce01e44319a1b75011aacad7575fc0 -size 30004883 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-4k.pth deleted file mode 100644 index a0c87934877f4b602fb1837f3622e834c68df8f8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cd66a2944fa9937d02db9ca207d45532d46f6bea1b1d8b0110da9070284e336 -size 1537632233 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-instruct.pth deleted file mode 100644 index 9d678fc0b0c7e5c727a676497397f4fa3acbdd6c..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-enwiki-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:034212556c2d5dd6eca8d12ee3ec0daf4aad7cddeab006934130e7fdab4a2b34 -size 1537633073 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth deleted file mode 100644 index b525c72fd31827ffe8aadfecb883cf5b3cceb8f6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d91f2c3f5b96e9d249342bdead58f58d3b1f5ab7c92401a50ab4e5170ae2636 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth deleted file mode 100644 index 4aade76d3bbd951a784adce9596a631a5c1640e8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a00e8cbc2222bb853dc5f83fe3d6f4c43f4b970cc554be37fb937d476e3eaf88 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth deleted file mode 100644 index 4b8c0bd7e1c67c46e2676c3c9141d7c46019114d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cc22ca95e9a2054534ef8fcce63cf2d0ce65916b39318ce650debd41adee876 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth deleted file mode 100644 index 60365f13edf8b2534dc2ec815b0cc8e327a79570..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-512.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:facd3a8913710e7c17719547c55dcde02826ce2d592626c0339e42b394858498 -size 1537632513 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth deleted file mode 100644 index 242c0b4bb345f3db30feafae94d0928d82498501..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:28d84ce479bb5ca4e3a226f9eb03266c344a398bfff8f420af4ae5598f23fe86 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-instruct.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-instruct.pth deleted file mode 100644 index a0dce592c711e7aa9e3268e4856ed36b1995db1e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-instruct.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4da514e31111bd781ef43fd38c278c49c8e3228c9546dacebc3aaa1710d33753 -size 1537632653 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-neox-init.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-neox-init.pth deleted file mode 100644 index 78b68d75077f38b3b22e07195a4ab7b0e4f4bfc2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-neox-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba3acd62e52a17868b280dc1c746498810014b39eb1282c952e45a1dd1bdc058 -size 1537632793 diff --git a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth b/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth deleted file mode 100644 index 1bf1e3dc51ade1b71678c5f59d05067dfad6a652..0000000000000000000000000000000000000000 --- a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:327bb880f90f80ba9d119f0ca43fb108994e02c5935ec0384a619b092fc2f341 -size 5774098255 diff --git a/manual-uploads/3B-code/_anchor b/manual-uploads/3B-code/_anchor deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth deleted file mode 100644 index 85b2cc712939163ad6ffc54bc460941d4b9d38cd..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1844acad3a36721d4427efa928dd7bbe84bff6ec98ceb310db33987106672a8d -size 3155687506 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth deleted file mode 100644 index fbcd575d03307f44cf24f2855f831273aa31a819..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a8289e17931e0d3ed2cc213eaa66e1ce12f005c69030a9afb38b33987f8877b -size 6126236920 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth deleted file mode 100644 index ee19bc64b756b3f670b76b0754f8c9bb20960aab..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3faf38a7820276bc2dc36d27259d7067c56aa228ec5dd72f743dfc9d72ff3988 -size 15036330880 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth deleted file mode 100644 index 414398090acb93c53e824c8b21fd9a840dc301e6..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0dac80051873f2fc1bb4645d7986330b49976520ddad6574ab4ad4d3dc3bdc15 -size 3155687506 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth deleted file mode 100644 index 8e7c0e5c5233a0b493e06a7d261ab72a14e68d71..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11238a58929e3cb5c4cfe2777e555c66f01a09e391361e6cc30143eb5360e1ac -size 6126236920 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth deleted file mode 100644 index d46afdf695f7e39687ceae0cafb215d175b58936..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aeea26eac9b77fdc82484533e98374cc06b08ab2084d7e7f062325b86a912b -size 15036330880 diff --git a/manual-uploads/RWKV-v5-memory-test/_anchor.txt b/manual-uploads/RWKV-v5-memory-test/_anchor.txt deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000