{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Vx7KFfeieD7z" }, "source": [ "# RWKV-v4-RNN-Pile Fine-Tuning\n", "\n", "[RWKV](https://github.com/BlinkDL/RWKV-LM) is an RNN with transformer-level performance\n", "\n", "\n", "This notebook aims to streamline fine-tuning RWKV-v4 models" ] }, { "cell_type": "markdown", "metadata": { "id": "7JFIiAsrfvJy" }, "source": [ "\n", "## Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "g_qFjgYmtSfK" }, "outputs": [], "source": [ "#@title Google Drive Options { display-mode: \"form\" }\n", "save_models_to_drive = True #@param {type:\"boolean\"}\n", "drive_mount = '/content/drive' #@param {type:\"string\"}\n", "output_dir = 'rwkv-v4-rnn-pile-tuning' #@param {type:\"string\"}\n", "tuned_model_name = 'tuned' #@param {type:\"string\"}\n", "\n", "import os\n", "from google.colab import drive\n", "if save_models_to_drive:\n", " from google.colab import drive\n", " drive.mount(drive_mount)\n", " \n", "output_path = f\"{drive_mount}/MyDrive/{output_dir}\" if save_models_to_drive else f\"/content/{output_dir}\"\n", "os.makedirs(f\"{output_path}/{tuned_model_name}\", exist_ok=True)\n", "os.makedirs(f\"{output_path}/base_models/\", exist_ok=True)\n", "\n", "print(f\"Saving models to {output_path}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eivKJ6FP1_9z", "outputId": "a687e3ad-8158-492a-da86-4f4ed8804699", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Fri Sep 2 16:11:37 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 35C P0 28W / 250W | 0MiB / 16280MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "R4lt0FTegJw9" }, "outputs": [], "source": [ "!git clone https://github.com/blinkdl/RWKV-LM\n", "repo_dir = \"/content/RWKV-LM/RWKV-v4\"\n", "%cd $repo_dir" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RDavUrBsgKIV" }, "outputs": [], "source": [ "!pip install transformers pytorch-lightning==1.9 deepspeed wandb ninja" ] }, { "cell_type": "markdown", "metadata": { "id": "Wt7y7vR6e6U3" }, "source": [ "## Load Base Model\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KIgagN-Se3wi" }, "outputs": [], "source": [ "#@title Base Model Options\n", "#@markdown Using any of the listed options will download the checkpoint from huggingface\n", "\n", "base_model_name = \"RWKV-4-Pile-169M\" #@param [\"RWKV-4-Pile-1B5\", \"RWKV-4-Pile-430M\", \"RWKV-4-Pile-169M\"]\n", "base_model_url = f\"https://huggingface.co/BlinkDL/{base_model_name.lower()}\"\n", "\n", "# This may take a while\n", "!git lfs clone $base_model_url\n", "\n", "from glob import glob\n", "base_model_path = glob(f\"{base_model_name.lower()}/{base_model_name}*.pth\")[0]\n", "\n", "print(f\"Using {base_model_path} as base\")" ] }, { "cell_type": "markdown", "metadata": { "id": "hCOPnLelfJgP" }, "source": [ "## Generate Training Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wW5OmlXmvaIU", "cellView": "form" }, "outputs": [], "source": [ "#@title Training Data Options\n", "#@markdown `input_file` should be the path to a single file that contains the text you want to fine-tune with.\n", "#@markdown Either upload a file to this notebook instance or reference a file in your Google drive.\n", "\n", "import numpy as np\n", "from transformers import PreTrainedTokenizerFast\n", "\n", "tokenizer = PreTrainedTokenizerFast(tokenizer_file=f'{repo_dir}/20B_tokenizer.json')\n", "\n", "input_file = \"/content/drive/MyDrive/training.txt\" #@param {type:\"string\"}\n", "output_file = 'train.npy'\n", "\n", "print(f'Tokenizing {input_file} (VERY slow. please wait)')\n", "\n", "data_raw = open(input_file, encoding=\"utf-8\").read()\n", "print(f'Raw length = {len(data_raw)}')\n", "\n", "data_code = tokenizer.encode(data_raw)\n", "print(f'Tokenized length = {len(data_code)}')\n", "\n", "out = np.array(data_code, dtype='uint16')\n", "np.save(output_file, out, allow_pickle=False)" ] }, { "cell_type": "markdown", "metadata": { "id": "I4lz-3maeIwY" }, "source": [ "## Training" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "fuCw5_ASwMud" }, "outputs": [], "source": [ "#@title Training Options { display-mode: \"form\" }\n", "from shutil import copy\n", "import os\n", "\n", "def training_options():\n", " EXPRESS_PILE_MODE = True\n", " EXPRESS_PILE_MODEL_NAME = base_model_path.split(\".\")[0]\n", " EXPRESS_PILE_MODEL_TYPE = base_model_name\n", " n_epoch = 100 #@param {type:\"integer\"}\n", " epoch_save_frequency = 25 #@param {type:\"integer\"}\n", " batch_size = 11#@param {type:\"integer\"} \n", " ctx_len = 384 #@param {type:\"integer\"}\n", " epoch_save_path = f\"{output_path}/{tuned_model_name}\"\n", " return locals()\n", "\n", "def model_options():\n", " T_MAX = 384 #@param {type:\"integer\"}\n", " return locals()\n", "\n", "def env_vars():\n", " RWKV_FLOAT_MODE = 'fp16' #@param ['fp16', 'bf16', 'bf32'] {type:\"string\"}\n", " RWKV_DEEPSPEED = '0' #@param ['0', '1'] {type:\"string\"}\n", " return {f\"os.environ['{key}']\": value for key, value in locals().items()}\n", "\n", "def replace_lines(file_name, to_replace):\n", " with open(file_name, 'r') as f:\n", " lines = f.readlines()\n", " with open(f'{file_name}.tmp', 'w') as f:\n", " for line in lines:\n", " key = line.split(\" =\")[0]\n", " if key.strip() in to_replace:\n", " value = to_replace[key.strip()]\n", " if isinstance(value, str):\n", " f.write(f'{key} = \"{value}\"\\n')\n", " else:\n", " f.write(f'{key} = {value}\\n')\n", " else:\n", " f.write(line)\n", " copy(f'{file_name}.tmp', file_name)\n", " os.remove(f'{file_name}.tmp')\n", "\n", "values = training_options()\n", "values.update(env_vars())\n", "replace_lines('train.py', values)\n", "replace_lines('src/model.py', model_options())" ] }, { "cell_type": "code", "source": [ "!python train.py " ], "metadata": { "id": "0ZSF8U-nzylI" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "pcDci4O7xJiZ" }, "execution_count": null, "outputs": [] } ], "metadata": { "accelerator": "GPU", "colab": { "name": "RWKV-v4-RNN-Pile Fine-Tuning", "provenance": [], "toc_visible": true }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }