diff --git a/.gitattributes b/.gitattributes index b6d19782bcbc7fe236d0826bfe059c5aa9f6ba2a..fa3fa5a78241b9fd2bdb178c5d12d61f75996394 100644 --- a/.gitattributes +++ b/.gitattributes @@ -93,15 +93,3 @@ experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb filter=lfs experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb filter=lfs diff=lfs merge=lfs -text -experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb filter=lfs diff=lfs merge=lfs -text diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth deleted file mode 100644 index df9e649b821c97a2e5912e7f73555d6ed6cb4133..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c447bfd1844d0c3e536fb8824d029fd8b0e334e1368f807a4e85cd7099005130 -size 1721187285 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index 7e510b454e51f3e1360458d4aa4bc8f97480caba..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70b243f59685c4df841f16343bc7ff6947a3125cec5dabf9035b28b65c04da0e -size 1721187285 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth deleted file mode 100644 index af865daa0e29c34250db338e077056e58b7bba5e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-enwiki-4k-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a57c278ed7e7e2f9d7f0436540674bfa5178adcd04c3154f5d92992e0602c55b -size 1721187621 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth deleted file mode 100644 index 613ae00e6b94d2f54a518f1072636f21e570c85e..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-E0_01-neox-v5base-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06105d96413046fce0ec189b9c4685a813cfa7147300851c5d2afc7b5adbcb38 -size 1721189797 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb deleted file mode 100644 index 04f75fe75d5a993aa7050629019cd0e3cf72c508..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31f61ce42e82d9a475446458ed015a190f16dd9b2b17bd67f4feedd9f72750ad -size 16577145 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb deleted file mode 100644 index 2caac2060cc81dbfc7e4840004960eeed06d0e29..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b53c27ed2c20b9f1f690647a83c0fbe2ce09594518b9ec557f515a4f8b548f2b -size 15941299 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index 2f736f75a1664aecb04c1e0fe217b71a77aecbf5..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c380bcd4b861a8af263fd56dc6e183b9e06ba0bc8f9895c4dcd8a678b58296e8 -size 1721187621 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth deleted file mode 100644 index 7f96f25f0aac70ea5a9c88a5208d0c071bd9fee9..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89f8caf661887bdba1897a10009f033331c552bfb763112e6da1b850d8ec3ff7 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth deleted file mode 100644 index 15f17b01a2826359ce6ac3f3bea9b310b2b596e3..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-layer-expansion-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2680e091197e798686c97bdd2af0f6827f2b29c648cc1ae03f67d6f094859618 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth deleted file mode 100644 index 8bb49e3d9132afcd95cecfa46932131d2971c1e2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35a5d7571d90160edc20ce95abfdbcb6109ad47eccdefe8051bd8f15d12bf326 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth deleted file mode 100644 index ac0f774b491b1af1dec3e65871dcb2618a295104..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-E0_01-overwrite-naive-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6b50bf05f191da87a6a17072d485d4059a4ded1335605e6b7bb8e9f2648d966 -size 1721189525 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb deleted file mode 100644 index ae3b85f4a8acf6c3001f445ade22f015a8d52327..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb +++ /dev/null @@ -1,2461 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "d3126ef2", - "metadata": { - "papermill": { - "duration": 0.004879, - "end_time": "2023-10-11T08:02:23.608034", - "exception": false, - "start_time": "2023-10-11T08:02:23.603155", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# RWKV v5 multi-size training experiment\n", - "\n", - "**Note:** This project assumes you have the rwkv-infctx conda env setup" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "986070aa", - "metadata": { - "papermill": { - "duration": 0.002523, - "end_time": "2023-10-11T08:02:23.613605", - "exception": false, - "start_time": "2023-10-11T08:02:23.611082", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Basic Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "dc924c7f", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:23.620990Z", - "iopub.status.busy": "2023-10-11T08:02:23.620432Z", - "iopub.status.idle": "2023-10-11T08:02:24.379549Z", - "shell.execute_reply": "2023-10-11T08:02:24.378580Z" - }, - "papermill": { - "duration": 0.765369, - "end_time": "2023-10-11T08:02:24.381741", - "exception": false, - "start_time": "2023-10-11T08:02:23.616372", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# First lets setup the various directories, and init the model\n", - "!mkdir -p ../../../../model/\n", - "!mkdir -p ../../../../datapath/\n", - "!mkdir -p ../../../../checkpoint/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2bbc32ac", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:24.389788Z", - "iopub.status.busy": "2023-10-11T08:02:24.389227Z", - "iopub.status.idle": "2023-10-11T08:02:24.398441Z", - "shell.execute_reply": "2023-10-11T08:02:24.397578Z" - }, - "papermill": { - "duration": 0.015548, - "end_time": "2023-10-11T08:02:24.400362", - "exception": false, - "start_time": "2023-10-11T08:02:24.384814", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DEEPSPEED_STRAT: deepspeed_stage_2_offload\n", - "ENABLE_WANDB: True\n", - "GPU_DEVICES: auto\n", - "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train\n", - "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n", - "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n" - ] - } - ], - "source": [ - "DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n", - "GPU_DEVICES=\"auto\"\n", - "ENABLE_WANDB=True\n", - "\n", - "EMBED_SCALE=0.01\n", - "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", - "\n", - "EMBED_SIZE=2048\n", - "\n", - "WANDB_PREFIX=f\"[Multi-size] v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE}\"\n", - "FILENAME_PREFIX=f\"v5-L6+6-D{EMBED_SIZE}-E{EMBED_SCALE_LABEL}\"\n", - "\n", - "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", - "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", - "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", - "\n", - "if ENABLE_WANDB:\n", - " WANDB_MODE=\"online\"\n", - "else:\n", - " WANDB_MODE=\"disabled\"\n", - "\n", - "# Computing the notebook, and various paths\n", - "import os\n", - "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", - "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", - "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", - "\n", - "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", - "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", - "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", - "print(\"PROJECT_DIR:\", PROJECT_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ffa69634", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:02:24.408311Z", - "iopub.status.busy": "2023-10-11T08:02:24.407798Z", - "iopub.status.idle": "2023-10-11T08:03:19.634663Z", - "shell.execute_reply": "2023-10-11T08:03:19.633765Z" - }, - "papermill": { - "duration": 55.233419, - "end_time": "2023-10-11T08:03:19.636895", - "exception": false, - "start_time": "2023-10-11T08:02:24.403476", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-10-11 08:02:24-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.7, 18.154.227.69, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-10-11 08:02:24-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.69, 3.162.112.2, 3.162.112.100, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.69|:443... connected.\r\n", - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 1066536657 (1017M) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L6-D2048-E0_01-split-2a.pth’\r\n", - "\r\n", - "\r", - " v5-L6-D20 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 1%[ ] 15.26M 42.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 3%[ ] 30.52M 47.7MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 4%[ ] 45.26M 51.5MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 5%[> ] 59.20M 52.1MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 6%[> ] 65.20M 48.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 7%[> ] 76.29M 44.4MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 8%[> ] 91.03M 47.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 9%[> ] 91.55M 43.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 10%[=> ] 106.81M 43.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 11%[=> ] 120.25M 43.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 41.4MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 13%[=> ] 136.81M 42.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.8MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 15%[==> ] 152.72M 40.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 16%[==> ] 167.85M 41.5MB/s eta 21s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 18%[==> ] 183.10M 43.0MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2a 19%[==> ] 198.36M 43.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2a. 20%[===> ] 213.11M 44.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2a.p 22%[===> ] 228.36M 43.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2a.pt 22%[===> ] 228.87M 41.1MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2a.pth 24%[===> ] 244.13M 41.0MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2a.pth 25%[====> ] 259.40M 42.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2a.pth 26%[====> ] 272.83M 40.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2a.pth 28%[====> ] 289.40M 41.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2a.pth 28%[====> ] 289.92M 37.9MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2a.pth 29%[====> ] 304.66M 36.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2a.pth 30%[=====> ] 305.18M 33.4MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2a.pth 31%[=====> ] 318.60M 33.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2a.pth 31%[=====> ] 320.29M 33.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2a.pth 31%[=====> ] 320.57M 30.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2a.pth 32%[=====> ] 335.18M 30.1MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2a.pth 33%[=====> ] 345.53M 31.2MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2a.pth 34%[=====> ] 350.82M 29.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2a.pth 35%[======> ] 360.98M 31.3MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "a.pth 36%[======> ] 366.20M 29.6MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 37%[======> ] 380.96M 30.8MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 38%[======> ] 392.79M 32.0MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 39%[======> ] 396.73M 29.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 40%[=======> ] 411.99M 29.1MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 41%[=======> ] 426.73M 28.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 42%[=======> ] 427.25M 29.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 42%[=======> ] 435.25M 27.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 43%[=======> ] 438.04M 28.2MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 43%[=======> ] 442.05M 29.7MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 43%[=======> ] 446.00M 31.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 44%[=======> ] 457.24M 33.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 45%[========> ] 457.89M 31.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 46%[========> ] 473.02M 34.8MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 48%[========> ] 488.28M 34.1MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 49%[========> ] 503.03M 34.6MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 50%[=========> ] 518.29M 37.3MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 51%[=========> ] 525.10M 35.8MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 52%[=========> ] 534.05M 34.4MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 53%[=========> ] 548.80M 34.4MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 55%[==========> ] 562.75M 33.8MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 56%[==========> ] 579.31M 36.0MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 57%[==========> ] 581.49M 36.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 58%[==========> ] 592.93M 37.4MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 58%[==========> ] 595.09M 37.1MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 60%[===========> ] 610.35M 38.5MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 61%[===========> ] 625.61M 38.7MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 62%[===========> ] 640.36M 39.9MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 64%[===========> ] 653.30M 39.5MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 64%[===========> ] 656.13M 38.5MB/s eta 11s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 66%[============> ] 671.38M 38.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2a 67%[============> ] 685.57M 39.7MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2a. 67%[============> ] 686.64M 37.5MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2a.p 68%[============> ] 701.39M 37.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2a.pt 69%[============> ] 708.59M 38.8MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2a.pth 70%[=============> ] 715.34M 38.2MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2a.pth 71%[=============> ] 731.91M 40.7MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2a.pth 73%[=============> ] 747.17M 38.0MB/s eta 8s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2a.pth 73%[=============> ] 747.75M 38.0MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2a.pth 74%[=============> ] 762.42M 40.2MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2a.pth 75%[==============> ] 762.94M 37.2MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2a.pth 76%[==============> ] 776.37M 36.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2a.pth 76%[==============> ] 778.20M 34.9MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2a.pth 77%[==============> ] 791.63M 38.1MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2a.pth 78%[==============> ] 793.46M 36.0MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2a.pth 79%[==============> ] 808.20M 38.6MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2a.pth 80%[===============> ] 816.07M 36.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2a.pth 81%[===============> ] 823.97M 34.7MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2a.pth 82%[===============> ] 837.41M 36.4MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "a.pth 83%[===============> ] 853.98M 38.3MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 85%[================> ] 867.67M 38.4MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 85%[================> ] 873.17M 39.1MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 87%[================> ] 885.01M 36.1MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 88%[================> ] 899.75M 37.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 88%[================> ] 900.40M 34.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 90%[=================> ] 915.53M 35.4MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 91%[=================> ] 930.78M 37.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 92%[=================> ] 945.53M 40.9MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 93%[=================> ] 946.04M 37.6MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 94%[=================> ] 959.48M 38.0MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 94%[=================> ] 961.30M 33.1MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 95%[==================> ] 976.05M 34.9MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 97%[==================> ] 991.31M 34.8MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 97%[==================> ] 992.94M 35.0MB/s eta 2s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 98%[==================> ] 1005M 34.7MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 99%[==================> ] 1016M 33.7MB/s eta 0s \r", - "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 33.9MB/s in 28s \r\n", - "\r\n", - "2023-10-11 08:02:52 (36.4 MB/s) - ‘v5-L6-D2048-E0_01-split-2a.pth’ saved [1066536657/1066536657]\r\n", - "\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-10-11 08:02:53-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.7, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n", - "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-10-11 08:02:53-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.95, 3.162.112.100, 3.162.112.2, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.95|:443... connected.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HTTP request sent, awaiting response... " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 OK\r\n", - "Length: 1066536657 (1017M) [binary/octet-stream]\r\n", - "Saving to: ‘v5-L6-D2048-E0_01-split-2b.pth’\r\n", - "\r\n", - "\r", - " v5-L6-D20 0%[ ] 0 --.-KB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 1%[ ] 14.74M 67.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 2%[ ] 28.69M 63.1MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 3%[ ] 30.52M 42.3MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 4%[ ] 45.26M 45.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 4%[ ] 45.78M 37.9MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 6%[> ] 61.03M 41.2MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 7%[> ] 75.78M 45.0MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 8%[> ] 85.94M 45.6MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 9%[> ] 91.55M 40.8MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 10%[=> ] 106.81M 40.5MB/s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 40.2MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 13%[=> ] 137.33M 41.7MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.9MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 16%[==> ] 167.33M 43.6MB/s eta 22s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 17%[==> ] 181.32M 44.9MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 18%[==> ] 183.10M 41.7MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2b 19%[==> ] 196.53M 41.8MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2b. 19%[==> ] 198.36M 39.0MB/s eta 19s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2b.p 20%[===> ] 213.11M 39.0MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2b.pt 21%[===> ] 220.29M 40.8MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2b.pth 22%[===> ] 228.36M 39.9MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2b.pth 24%[===> ] 244.13M 40.3MB/s eta 20s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2b.pth 25%[====> ] 259.40M 40.4MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2b.pth 26%[====> ] 274.14M 42.1MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2b.pth 27%[====> ] 274.66M 38.5MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2b.pth 28%[====> ] 289.92M 41.6MB/s eta 18s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2b.pth 30%[=====> ] 305.18M 41.6MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2b.pth 31%[=====> ] 320.43M 40.9MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2b.pth 32%[=====> ] 335.18M 41.2MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2b.pth 33%[=====> ] 335.69M 38.5MB/s eta 17s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2b.pth 34%[=====> ] 350.95M 38.6MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2b.pth 35%[======> ] 365.70M 40.9MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2b.pth 36%[======> ] 366.20M 38.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2b.pth 37%[======> ] 381.47M 38.8MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "b.pth 37%[======> ] 385.65M 39.1MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 39%[======> ] 396.73M 36.3MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 39%[======> ] 406.75M 37.5MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 40%[=======> ] 411.99M 33.0MB/s eta 16s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 42%[=======> ] 427.25M 33.6MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 43%[=======> ] 441.98M 32.7MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 43%[=======> ] 442.51M 32.5MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 44%[=======> ] 457.25M 32.2MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 45%[========> ] 457.76M 32.2MB/s eta 15s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 46%[========> ] 472.50M 31.5MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 46%[========> ] 473.02M 31.5MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6- 48%[========> ] 488.28M 30.9MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D 49%[========> ] 503.54M 33.6MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2 50%[=========> ] 518.29M 34.2MB/s eta 14s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D20 51%[=========> ] 518.80M 34.8MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D204 52%[=========> ] 534.05M 34.2MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048 54%[=========> ] 549.31M 37.5MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048- 55%[==========> ] 564.06M 37.7MB/s eta 13s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E 55%[==========> ] 565.78M 37.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0 57%[==========> ] 579.83M 37.8MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_ 58%[==========> ] 595.09M 39.7MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_0 60%[===========> ] 610.35M 40.9MB/s eta 12s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01 61%[===========> ] 625.47M 44.1MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6-D2048-E0_01- 61%[===========> ] 629.82M 42.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5-L6-D2048-E0_01-s 63%[===========> ] 640.87M 42.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "5-L6-D2048-E0_01-sp 64%[===========> ] 656.13M 45.6MB/s eta 10s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-L6-D2048-E0_01-spl 66%[============> ] 671.38M 45.8MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "L6-D2048-E0_01-spli 67%[============> ] 686.64M 47.1MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "6-D2048-E0_01-split 69%[============> ] 701.90M 47.0MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-D2048-E0_01-split- 70%[=============> ] 717.16M 46.9MB/s eta 9s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "D2048-E0_01-split-2 71%[=============> ] 730.60M 47.8MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2048-E0_01-split-2b 73%[=============> ] 747.17M 45.9MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "048-E0_01-split-2b. 74%[=============> ] 755.98M 45.7MB/s eta 7s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "48-E0_01-split-2b.p 75%[==============> ] 762.94M 43.2MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8-E0_01-split-2b.pt 76%[==============> ] 777.68M 45.4MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_01-split-2b.pth 76%[==============> ] 778.32M 42.7MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "E0_01-split-2b.pth 78%[==============> ] 793.46M 42.0MB/s eta 6s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "0_01-split-2b.pth 79%[==============> ] 808.20M 41.7MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "_01-split-2b.pth 80%[===============> ] 814.09M 42.3MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "01-split-2b.pth 80%[===============> ] 823.46M 41.0MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "1-split-2b.pth 81%[===============> ] 823.97M 40.5MB/s eta 5s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-split-2b.pth 82%[===============> ] 838.71M 38.6MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "split-2b.pth 83%[===============> ] 853.98M 41.4MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "plit-2b.pth 84%[===============> ] 854.61M 38.0MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "lit-2b.pth 85%[================> ] 869.24M 35.7MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "it-2b.pth 85%[================> ] 869.75M 35.3MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "t-2b.pth 86%[================> ] 875.74M 34.3MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-2b.pth 87%[================> ] 885.01M 32.5MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "2b.pth 88%[================> ] 900.27M 33.8MB/s eta 4s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "b.pth 89%[================> ] 913.70M 34.5MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - ".pth 90%[=================> ] 924.21M 34.8MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "pth 91%[=================> ] 930.27M 35.3MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "th 93%[=================> ] 946.04M 34.9MB/s eta 3s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "h 94%[=================> ] 961.30M 37.2MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " 95%[==================> ] 970.14M 35.7MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v 96%[==================> ] 976.55M 34.9MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5 97%[==================> ] 991.82M 37.0MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5- 98%[==================> ] 998.13M 35.6MB/s eta 1s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L 98%[==================> ] 1007M 37.2MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - " v5-L6 99%[==================> ] 1016M 34.9MB/s eta 0s \r", - "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 35.1MB/s in 26s \r\n", - "\r\n", - "2023-10-11 08:03:19 (38.9 MB/s) - ‘v5-L6-D2048-E0_01-split-2b.pth’ saved [1066536657/1066536657]\r\n", - "\r\n" - ] - } - ], - "source": [ - "# Get the init split model, and finetune from there\n", - "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\"\n", - "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2a3cd2d1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:19.666619Z", - "iopub.status.busy": "2023-10-11T08:03:19.665958Z", - "iopub.status.idle": "2023-10-11T08:03:29.305787Z", - "shell.execute_reply": "2023-10-11T08:03:29.304873Z" - }, - "papermill": { - "duration": 9.658186, - "end_time": "2023-10-11T08:03:29.308744", - "exception": false, - "start_time": "2023-10-11T08:03:19.650558", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "Saving the dataset (0/2 shards): 0%| | 0/27200 [00:00\r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", - " self.instantiate_classes()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", - " self.config_init = self.parser.instantiate_classes(self.config)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", - " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", - " component.instantiate_class(component, cfg)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", - " parent[key] = group.group_class(**value)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2a.pth' does not exist\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion A3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-split-2a.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "53867c42", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:46.969471Z", - "iopub.status.busy": "2023-10-11T08:03:46.969019Z", - "iopub.status.idle": "2023-10-11T08:03:50.682437Z", - "shell.execute_reply": "2023-10-11T08:03:50.680986Z" - }, - "papermill": { - "duration": 3.732808, - "end_time": "2023-10-11T08:03:50.685581", - "exception": false, - "start_time": "2023-10-11T08:03:46.952773", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:03:49,278] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-a3/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-a3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5688e577", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:50.806267Z", - "iopub.status.busy": "2023-10-11T08:03:50.804997Z", - "iopub.status.idle": "2023-10-11T08:03:56.788036Z", - "shell.execute_reply": "2023-10-11T08:03:56.786568Z" - }, - "papermill": { - "duration": 6.08675, - "end_time": "2023-10-11T08:03:56.790510", - "exception": false, - "start_time": "2023-10-11T08:03:50.703760", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:03:54,934] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-a3.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-a3.pth\" \"cuda fp32\"" - ] - }, - { - "cell_type": "markdown", - "id": "b4927e87", - "metadata": { - "papermill": { - "duration": 0.015295, - "end_time": "2023-10-11T08:03:56.820640", - "exception": false, - "start_time": "2023-10-11T08:03:56.805345", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Enwiki Stage 3 : Split-Baseline-B training" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6bdd285a", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:03:56.853495Z", - "iopub.status.busy": "2023-10-11T08:03:56.852946Z", - "iopub.status.idle": "2023-10-11T08:04:11.500794Z", - "shell.execute_reply": "2023-10-11T08:04:11.499336Z" - }, - "papermill": { - "duration": 14.668001, - "end_time": "2023-10-11T08:04:11.503644", - "exception": false, - "start_time": "2023-10-11T08:03:56.835643", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:01,096] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/enwiki-4k-part3.yaml', '--trainer.logger.init_args.name=[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)', '--trainer.strategy=deepspeed_stage_2_offload', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/', '--model.load_model=../model/v5-L6+6-D2048-E0_01-split-2b.pth', '--model.ctx_len=4096', '--model.bptt_learning_range=1'].\r\n", - " rank_zero_warn(\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1732922148\r\n", - " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", - "Global seed set to 1732922148\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 278, in \r\n", - " cli_main()\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 253, in cli_main\r\n", - " LightningCLI(\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", - " self.instantiate_classes()\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", - " self.config_init = self.parser.instantiate_classes(self.config)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", - " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", - " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", - " component.instantiate_class(component, cfg)\r\n", - " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", - " parent[key] = group.group_class(**value)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-split-2b.pth' does not exist\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j/logs\u001b[0m\r\n" - ] - } - ], - "source": [ - "# Start the foundation model training\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", - " python3 lightning_trainer.py fit \\\n", - " -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", - " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - layer-expansion B3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", - " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", - " --trainer.devices=\"{GPU_DEVICES}\" \\\n", - " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/\" \\\n", - " --model.load_model=\"../model/{FILENAME_PREFIX}-split-2b.pth\" \\\n", - " --model.ctx_len=4096 \\\n", - " --model.bptt_learning_range=1" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ae4623a1", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:04:11.546046Z", - "iopub.status.busy": "2023-10-11T08:04:11.544870Z", - "iopub.status.idle": "2023-10-11T08:04:15.274349Z", - "shell.execute_reply": "2023-10-11T08:04:15.272957Z" - }, - "papermill": { - "duration": 3.754115, - "end_time": "2023-10-11T08:04:15.277163", - "exception": false, - "start_time": "2023-10-11T08:04:11.523048", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:13,869] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", - " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", - " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", - " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", - "ValueError: Unable to find 'latest' file at ../checkpoint/v5-L6+6-D2048-E0_01-layer-expansion-b3/last.ckpt/latest\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth': No such file or directory\r\n" - ] - } - ], - "source": [ - "# Lets export the model from the checkpoint\n", - "!cd \"{TRAINER_DIR}\" && \\\n", - " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-layer-expansion-b3/last.ckpt\" \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"bf16\"\n", - "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8e1b1152", - "metadata": { - "execution": { - "iopub.execute_input": "2023-10-11T08:04:15.319747Z", - "iopub.status.busy": "2023-10-11T08:04:15.318636Z", - "iopub.status.idle": "2023-10-11T08:04:21.268526Z", - "shell.execute_reply": "2023-10-11T08:04:21.267073Z" - }, - "papermill": { - "duration": 5.974644, - "end_time": "2023-10-11T08:04:21.271495", - "exception": false, - "start_time": "2023-10-11T08:04:15.296851", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2023-10-11 08:04:19,430] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n", - "Traceback (most recent call last):\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in \r\n", - " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 1420, in __init__\r\n", - " self.model = RWKV(**model_config)\r\n", - " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 566, in __init__\r\n", - " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", - "ValueError: load_model file '../model/v5-L6+6-D2048-E0_01-layer-expansion-b3.pth' does not exist\r\n" - ] - } - ], - "source": [ - "# # Lets do a quick dragon prompt validation\n", - "!cd \"{INFERENCE_DIR}\" && \\\n", - " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-layer-expansion-b3.pth\" \"cuda fp32\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "papermill": { - "default_parameters": {}, - "duration": 119.315066, - "end_time": "2023-10-11T08:04:21.714050", - "environment_variables": {}, - "exception": null, - "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb", - "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb", - "parameters": {}, - "start_time": "2023-10-11T08:02:22.398984", - "version": "2.4.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb deleted file mode 100644 index 31a5b8eeab80f4fb0b5a736155d2fd141fa7fd54..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-expansion.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0aa2c37ab25e53ed3e45a9e7b5b09d1ac2d2f627412df5c98cc1f113838d800 -size 15734950 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb deleted file mode 100644 index 9810fb95056168b6f333635a6ad59587d31b6e23..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-overwrite-naive.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d643e2a64a0f7323eb7b14b90ce5a0e5457818349c75e666dbf52b7319f5de72 -size 15733849 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth deleted file mode 100644 index b42c1d46426286791c4b684a05f90055dccae4d1..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p2.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:849b57b4d493d40313ef04b30ffc22ec6f5cb99e05225615ee0cb00acb78a95d -size 1066537077 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth deleted file mode 100644 index e671afa6d1c25ea33703bbbdf389a33493910501..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-baseline-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8413565273ef40f61db246dcbf793e045b39d1163e18885441be5a16d733f34c -size 1066537077 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth deleted file mode 100644 index b5857b83e411d72861863eda5c9c32a7132e1bfe..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-enwiki-4k-p1.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:235d88b0aa939596392f2b5734a426940535816aa13106498974a809051a4c75 -size 1066537217 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth deleted file mode 100644 index e145614e20e99af77e84454e6ef16a39a61c1d9f..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-a3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1afd8d92632792f498805ac222d159524badf4ecbcaaae597060b6bb87a53110 -size 1066538057 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth deleted file mode 100644 index 381a48603dc68a10750a4b7d78e79594e6bde52d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-layer-expansion-b3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e61d8f8901d1eb50759f0242e2886678ed24b9931295a270b14120ba74cb5c3 -size 1066538057 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth deleted file mode 100644 index ded0f392eb463040cbb0e4a66326c5ae08bcbda6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-neox-v5base-init.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2d60ede71bc384ee4eff0a591b3fa57dd670c27e5e8ce5eadf25a7f0d7e226d -size 1066538337 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth deleted file mode 100644 index c498833cf2e305eacbd6ebd9485e9a5d6706eca2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea -size 1066536657 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth deleted file mode 100644 index b1bfb4e806da5dde645c9feb2acb0b0140ce43c6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25 -size 1066536657 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth deleted file mode 100644 index bfe873e0bdd09173577c50c9f6f3634155ade0ce..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-2m.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f10f8f00c42b6408db81a3b26d53411c41edc7f23f5097ac095ad3096d6c5dc1 -size 1066537497 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth deleted file mode 100644 index f2aa96bd9b7f4e604e397947323f5156ee2fa129..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-merge-p3.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f07a8414cd0cd1c3df705dff8a0f2142231171ee52a94d12c55dfe7c888fef7 -size 1066537497 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb deleted file mode 100644 index e04ec817954792ce45a871de0ebed229db957ffd..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fccffc430231ad06fdb02a7e50ea57acfbeae3c42a97b018f62f937d30736e4 -size 16519239 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb deleted file mode 100644 index 955412d6f333912148d0dc1023c32ce58509ccd2..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-part1.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7260b3fe80de461d6dc923b21af87361f71e26a4a7191d51dd9665403728ddfa -size 15732960 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb deleted file mode 100644 index 5c78f23bf3d5e33fcb6836c803015836a2da0149..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-baseline.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f95adf89d498a4dd58af22ba192b2fd4d08ceec250784c7e9f6f9b8de0fed2bc -size 15855123 diff --git a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb b/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb deleted file mode 100644 index 50ba5f3c8e80bcfb1a8005406d9e4f78979d8dac..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-split-train.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c308e5ae9f8fde5fd24cafccf60917dca9c97fc2e0a5fbcfa01027d6d50e927d -size 16623766 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb index 7e206cc12ec5ccf27fa871d361750dce04655d13..97b063f5044277e0354a1f4cc4aa6140155c40f8 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0439ce7fb6866af36cb53bbddaf6a1ed49656c85a84d6a2aabd6754b30fa2109 -size 61159745 +oid sha256:b01a0b009aa135d08ecf6442863da28f6a1854d6c1c4158ff255380ad9452944 +size 40077989 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb deleted file mode 100644 index 7ffd7172b586e0b8857d504b71f3808861d311aa..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3855c5ed19072f2ecaec4294c3945a3290e692d15c0aa7351c7d4917404fbf65 -size 38208798 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb deleted file mode 100644 index 2b40078b2fa26fff7d07cf99fa6ea18a8e9a50b1..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage5.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e318ecc4d20d89232f8a0677c54f8489d602cb3d66632cadb29c028c917eb00e -size 30322339 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth deleted file mode 100644 index 8af2cc985147220953dab23cbe6635297e1208ab..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-1k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf3a15d56db013d138bed6780d58c4362ca96b3ef98fb98e2d1444f325c582b5 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth deleted file mode 100644 index ec338ad0a412b427fe4cc7115959eb96face9f73..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-2k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76cc2d79013781f18c6507848a138150084d37c3aae0f20145e7e5854bcabb99 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth deleted file mode 100644 index ae6d693b3aafaef43037882c09d0bebc48f35c22..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f1b8811c3f2f8c12c564edb973392bb486c9d674152f3eb769ac206c20bcfc0 -size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth index 18bf2327f7771b99c67b0d997cbdf1ba8f34902d..1e709d0239b8d79d4ad6660f03afff518fe2873c 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-8k.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db7ab09a447f96d9fcdbee4761bd35f2c3bce9868d3136959ed601a8e478083c +oid sha256:bd972e83d03293bd1f58dc83f2a4639c0adb73f764999823c71e19bb5ac043d2 size 1066537077 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb index 67972444f9ed3d06fa1d595845dd1bbdebb54e5f..eeb9c9a90ac293c431a83554ec5153c190608b79 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46167bb1edba84bd184533d3d424a8c4d9ed5a15d8b93474596453f235462f13 -size 58866814 +oid sha256:3d9d8b56312496fbc5e82456eec32170f9931c911f74acdd5b3d16eb9602df1c +size 38290907 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb deleted file mode 100644 index d2df3a3f63985ebd40d91463248c0db8fe7ba56b..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage4.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17f586e9a94c3fbf463e84c518ca7712b6fd539d3c8dda8e4893115a1298c8d4 -size 37212168 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb deleted file mode 100644 index 4c2298987669d56cd07135b0ec4f9da50c75303c..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage5.ipynb +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b5c4dec751996e61882229f30bba0e005ce01e44319a1b75011aacad7575fc0 -size 30004883 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth deleted file mode 100644 index b525c72fd31827ffe8aadfecb883cf5b3cceb8f6..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-1k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d91f2c3f5b96e9d249342bdead58f58d3b1f5ab7c92401a50ab4e5170ae2636 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth deleted file mode 100644 index 4aade76d3bbd951a784adce9596a631a5c1640e8..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-2k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a00e8cbc2222bb853dc5f83fe3d6f4c43f4b970cc554be37fb937d476e3eaf88 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth deleted file mode 100644 index 4b8c0bd7e1c67c46e2676c3c9141d7c46019114d..0000000000000000000000000000000000000000 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cc22ca95e9a2054534ef8fcce63cf2d0ce65916b39318ce650debd41adee876 -size 1537632373 diff --git a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth index 242c0b4bb345f3db30feafae94d0928d82498501..3f04e0208f21014d7a964cd209e2d3bea780784a 100644 --- a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth +++ b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/v5r3-L6-D2560-E0_1-mem-ctx-8k.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28d84ce479bb5ca4e3a226f9eb03266c344a398bfff8f420af4ae5598f23fe86 +oid sha256:af21df70001ddb20febaba564d1f7093390d65e2b9109abdf94dfee5d70a9c8a size 1537632373 diff --git a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth b/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth deleted file mode 100644 index 1bf1e3dc51ade1b71678c5f59d05067dfad6a652..0000000000000000000000000000000000000000 --- a/manual-uploads/3B-code/3B-CM-v5r4-L48-D2048-E0_1-enwiki-4k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:327bb880f90f80ba9d119f0ca43fb108994e02c5935ec0384a619b092fc2f341 -size 5774098255 diff --git a/manual-uploads/3B-code/_anchor b/manual-uploads/3B-code/_anchor deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth deleted file mode 100644 index 85b2cc712939163ad6ffc54bc460941d4b9d38cd..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-1B5-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1844acad3a36721d4427efa928dd7bbe84bff6ec98ceb310db33987106672a8d -size 3155687506 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth deleted file mode 100644 index fbcd575d03307f44cf24f2855f831273aa31a819..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a8289e17931e0d3ed2cc213eaa66e1ce12f005c69030a9afb38b33987f8877b -size 6126236920 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth deleted file mode 100644 index ee19bc64b756b3f670b76b0754f8c9bb20960aab..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-1-RWKV-v5-7B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3faf38a7820276bc2dc36d27259d7067c56aa228ec5dd72f743dfc9d72ff3988 -size 15036330880 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth deleted file mode 100644 index 414398090acb93c53e824c8b21fd9a840dc301e6..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-1B5-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0dac80051873f2fc1bb4645d7986330b49976520ddad6574ab4ad4d3dc3bdc15 -size 3155687506 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth deleted file mode 100644 index 8e7c0e5c5233a0b493e06a7d261ab72a14e68d71..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-3B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11238a58929e3cb5c4cfe2777e555c66f01a09e391361e6cc30143eb5360e1ac -size 6126236920 diff --git a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth b/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth deleted file mode 100644 index d46afdf695f7e39687ceae0cafb215d175b58936..0000000000000000000000000000000000000000 --- a/manual-uploads/RWKV-v5-memory-test/Memory-Tune-Stage-2-RWKV-v5-7B-world.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aeea26eac9b77fdc82484533e98374cc06b08ab2084d7e7f062325b86a912b -size 15036330880 diff --git a/manual-uploads/RWKV-v5-memory-test/_anchor.txt b/manual-uploads/RWKV-v5-memory-test/_anchor.txt deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000