Spaces:

clecho52
/

rwkv-training

Running

App Files Files Community

clecho52 commited on May 29, 2023

Commit

7d59799

•

1 Parent(s): a1d0bac

Upload RWKV_v4_RNN_Pile_Fine_Tuning.ipynb

Browse files

Files changed (1) hide show

RWKV_v4_RNN_Pile_Fine_Tuning.ipynb +299 -0

RWKV_v4_RNN_Pile_Fine_Tuning.ipynb ADDED Viewed

	@@ -0,0 +1,299 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Vx7KFfeieD7z"
+      },
+      "source": [
+        "# RWKV-v4-RNN-Pile Fine-Tuning\n",
+        "\n",
+        "[RWKV](https://github.com/BlinkDL/RWKV-LM) is an RNN with transformer-level performance\n",
+        "\n",
+        "\n",
+        "This notebook aims to streamline fine-tuning RWKV-v4 models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7JFIiAsrfvJy"
+      },
+      "source": [
+        "\n",
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "g_qFjgYmtSfK"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Google Drive Options { display-mode: \"form\" }\n",
+        "save_models_to_drive = True #@param {type:\"boolean\"}\n",
+        "drive_mount = '/content/drive' #@param {type:\"string\"}\n",
+        "output_dir = 'rwkv-v4-rnn-pile-tuning' #@param {type:\"string\"}\n",
+        "tuned_model_name = 'tuned' #@param {type:\"string\"}\n",
+        "\n",
+        "import os\n",
+        "from google.colab import drive\n",
+        "if save_models_to_drive:\n",
+        "    from google.colab import drive\n",
+        "    drive.mount(drive_mount)\n",
+        "    \n",
+        "output_path = f\"{drive_mount}/MyDrive/{output_dir}\" if save_models_to_drive else f\"/content/{output_dir}\"\n",
+        "os.makedirs(f\"{output_path}/{tuned_model_name}\", exist_ok=True)\n",
+        "os.makedirs(f\"{output_path}/base_models/\", exist_ok=True)\n",
+        "\n",
+        "print(f\"Saving models to {output_path}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eivKJ6FP1_9z",
+        "outputId": "a687e3ad-8158-492a-da86-4f4ed8804699",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Fri Sep  2 16:11:37 2022       \n",
+            "+-----------------------------------------------------------------------------+\n",
+            "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
+            "|-------------------------------+----------------------+----------------------+\n",
+            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
+            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
+            "|                               |                      |               MIG M. |\n",
+            "|===============================+======================+======================|\n",
+            "|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |\n",
+            "| N/A   35C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |\n",
+            "|                               |                      |                  N/A |\n",
+            "+-------------------------------+----------------------+----------------------+\n",
+            "                                                                               \n",
+            "+-----------------------------------------------------------------------------+\n",
+            "| Processes:                                                                  |\n",
+            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
+            "|        ID   ID                                                   Usage      |\n",
+            "|=============================================================================|\n",
+            "|  No running processes found                                                 |\n",
+            "+-----------------------------------------------------------------------------+\n"
+          ]
+        }
+      ],
+      "source": [
+        "!nvidia-smi"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "R4lt0FTegJw9"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/blinkdl/RWKV-LM\n",
+        "repo_dir = \"/content/RWKV-LM/RWKV-v4\"\n",
+        "%cd $repo_dir"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RDavUrBsgKIV"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install transformers pytorch-lightning==1.9 deepspeed wandb ninja"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Wt7y7vR6e6U3"
+      },
+      "source": [
+        "## Load Base Model\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KIgagN-Se3wi"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Base Model Options\n",
+        "#@markdown Using any of the listed options will download the checkpoint from huggingface\n",
+        "\n",
+        "base_model_name = \"RWKV-4-Pile-169M\" #@param [\"RWKV-4-Pile-1B5\", \"RWKV-4-Pile-430M\", \"RWKV-4-Pile-169M\"]\n",
+        "base_model_url = f\"https://huggingface.co/BlinkDL/{base_model_name.lower()}\"\n",
+        "\n",
+        "# This may take a while\n",
+        "!git lfs clone $base_model_url\n",
+        "\n",
+        "from glob import glob\n",
+        "base_model_path = glob(f\"{base_model_name.lower()}/{base_model_name}*.pth\")[0]\n",
+        "\n",
+        "print(f\"Using {base_model_path} as base\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hCOPnLelfJgP"
+      },
+      "source": [
+        "## Generate Training Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wW5OmlXmvaIU",
+        "cellView": "form"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Training Data Options\n",
+        "#@markdown `input_file` should be the path to a single file that contains the text you want to fine-tune with.\n",
+        "#@markdown Either upload a file to this notebook instance or reference a file in your Google drive.\n",
+        "\n",
+        "import numpy as np\n",
+        "from transformers import PreTrainedTokenizerFast\n",
+        "\n",
+        "tokenizer = PreTrainedTokenizerFast(tokenizer_file=f'{repo_dir}/20B_tokenizer.json')\n",
+        "\n",
+        "input_file = \"/content/drive/MyDrive/training.txt\" #@param {type:\"string\"}\n",
+        "output_file = 'train.npy'\n",
+        "\n",
+        "print(f'Tokenizing {input_file} (VERY slow. please wait)')\n",
+        "\n",
+        "data_raw = open(input_file, encoding=\"utf-8\").read()\n",
+        "print(f'Raw length = {len(data_raw)}')\n",
+        "\n",
+        "data_code = tokenizer.encode(data_raw)\n",
+        "print(f'Tokenized length = {len(data_code)}')\n",
+        "\n",
+        "out = np.array(data_code, dtype='uint16')\n",
+        "np.save(output_file, out, allow_pickle=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I4lz-3maeIwY"
+      },
+      "source": [
+        "## Training"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fuCw5_ASwMud"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Training Options { display-mode: \"form\" }\n",
+        "from shutil import copy\n",
+        "import os\n",
+        "\n",
+        "def training_options():\n",
+        "    EXPRESS_PILE_MODE = True\n",
+        "    EXPRESS_PILE_MODEL_NAME = base_model_path.split(\".\")[0]\n",
+        "    EXPRESS_PILE_MODEL_TYPE = base_model_name\n",
+        "    n_epoch = 100 #@param {type:\"integer\"}\n",
+        "    epoch_save_frequency = 25 #@param {type:\"integer\"}\n",
+        "    batch_size =  11#@param {type:\"integer\"} \n",
+        "    ctx_len = 384 #@param {type:\"integer\"}\n",
+        "    epoch_save_path = f\"{output_path}/{tuned_model_name}\"\n",
+        "    return locals()\n",
+        "\n",
+        "def model_options():\n",
+        "    T_MAX = 384 #@param {type:\"integer\"}\n",
+        "    return locals()\n",
+        "\n",
+        "def env_vars():\n",
+        "    RWKV_FLOAT_MODE = 'fp16' #@param ['fp16', 'bf16', 'bf32'] {type:\"string\"}\n",
+        "    RWKV_DEEPSPEED = '0' #@param ['0', '1'] {type:\"string\"}\n",
+        "    return {f\"os.environ['{key}']\": value for key, value in locals().items()}\n",
+        "\n",
+        "def replace_lines(file_name, to_replace):\n",
+        "    with open(file_name, 'r') as f:\n",
+        "        lines = f.readlines()\n",
+        "    with open(f'{file_name}.tmp', 'w') as f:\n",
+        "        for line in lines:\n",
+        "            key = line.split(\" =\")[0]\n",
+        "            if key.strip() in to_replace:\n",
+        "                value = to_replace[key.strip()]\n",
+        "                if isinstance(value, str):\n",
+        "                    f.write(f'{key} = \"{value}\"\\n')\n",
+        "                else:\n",
+        "                    f.write(f'{key} = {value}\\n')\n",
+        "            else:\n",
+        "                f.write(line)\n",
+        "    copy(f'{file_name}.tmp', file_name)\n",
+        "    os.remove(f'{file_name}.tmp')\n",
+        "\n",
+        "values = training_options()\n",
+        "values.update(env_vars())\n",
+        "replace_lines('train.py', values)\n",
+        "replace_lines('src/model.py', model_options())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!python train.py "
+      ],
+      "metadata": {
+        "id": "0ZSF8U-nzylI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "pcDci4O7xJiZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "RWKV-v4-RNN-Pile Fine-Tuning",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}