File size: 9,004 Bytes

d303ab6
 
 
 
fa0da4d
 
 
d303ab6
 
 
 
 
fa0da4d
d303ab6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3ba77
d303ab6
 
 
 
 
 
 
 
fa0da4d
 
 
 
 
 
d303ab6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa0da4d
d303ab6
 
 
fa0da4d
 
 
 
 
 
d303ab6
 
 
 
 
 
 
 
 
 
fa0da4d
d303ab6
 
 
fa0da4d
 
 
 
 
 
d303ab6
 
aa3ba77
d303ab6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa0da4d
d303ab6
 
 
 
 
fa0da4d
d303ab6
 
 
fa0da4d
 
 
 
 
 
d303ab6
 
 
 
 
 
 
 
 
 
fa0da4d
d303ab6
fa0da4d

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ku0ezvyD42ng"
      },
      "source": [
        "#Quantizing huggingface models to exl2\n",
        "This version of my exl2 quantize colab creates a single quantizaion to upload privatly.\\\n",
        "To calculate an estimate for VRAM size use: [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator)\\\n",
        "Not all models and architectures are compatible with exl2."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "G7zSk2LWHtPU"
      },
      "outputs": [],
      "source": [
        "#@title Download and install environment\n",
        "!git clone https://github.com/turboderp/exllamav2\n",
        "%cd exllamav2\n",
        "print(\"Installing pip dependencies\")\n",
        "!pip install -q -r requirements.txt\n",
        "!pip install -q huggingface_hub requests tqdm\n",
        "!pip install . -q\n",
        "#@markdown Uses [download-model.py](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) by [oobabooga](https://github.com/oobabooga)\n",
        "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
        "model = \"none\"\n",
        "dsd = 'false'"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "8Hl3fQmRLybp"
      },
      "outputs": [],
      "source": [
        "#@title Login to HF (Required to upload files)\n",
        "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
        "#import required functions\n",
        "import os\n",
        "from huggingface_hub import login, get_token, whoami\n",
        "\n",
        "#get token\n",
        "if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: #check if user in kaggle\n",
        "    from kaggle_secrets import UserSecretsClient\n",
        "    from kaggle_web_client import BackendError\n",
        "    try:\n",
        "        login(UserSecretsClient().get_secret(\"HF_TOKEN\")) #login if token secret found\n",
        "    except BackendError:\n",
        "        print('''\n",
        "            When using Kaggle, make sure to use the secret key HF_TOKEN with a 'WRITE' token.\n",
        "                   This will prevent the need to login every time you run the script.\n",
        "                   Set your secrets with the secrets add-on on the top of the screen.\n",
        "             ''')\n",
        "if get_token() is not None:\n",
        "    #if the token is found then log in:\n",
        "    login(get_token())\n",
        "else:\n",
        "    #if the token is not found then prompt user to provide it:\n",
        "    login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))\n",
        "\n",
        "#if the token is read only then prompt user to provide a write token (Only required if user needs a WRITE token, remove if READ is enough):\n",
        "while True:\n",
        "    if whoami().get('auth', {}).get('accessToken', {}).get('role', None) != 'write':\n",
        "        if os.environ.get('HF_TOKEN', None) is not None: #if environ finds HF_TOKEN as read-only then display following text and exit:\n",
        "            print('''\n",
        "          You have the environment variable HF_TOKEN set.\n",
        "          You cannot log in.\n",
        "          Either set the environment variable to a 'WRITE' token or remove it.\n",
        "                  ''')\n",
        "            input(\"Press enter to continue.\")\n",
        "            exit()\n",
        "        if os.environ.get('COLAB_BACKEND_VERSION', None) is not None:\n",
        "            print('''\n",
        "                              Your Colab secret key is read-only\n",
        "                Please switch your key to 'write' or disable notebook access on the left.\n",
        "                               For now, you are stuck in a loop\n",
        "                  ''')\n",
        "        elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None:\n",
        "            print('''\n",
        "                                      Your Kaggle secret key is read-only\n",
        "                Please switch your key to 'write' or unattach from notebook in add-ons at the top.\n",
        "                          Having a read-only key attched will require login every time.\n",
        "                ''')\n",
        "        print(\"You do not have write access to this repository. Please use a valid token with (WRITE) access.\")\n",
        "        login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
        "        continue\n",
        "    break"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "NI1LUMD7H-Zx"
      },
      "outputs": [],
      "source": [
        "#@title ##Choose HF model to download\n",
        "#@markdown ###Repo should be formatted as user/repo\n",
        "#@markdown Weights must be stored in safetensors\n",
        "if model != \"none\":\n",
        "    !rm {model}-{BPW}bpw.zip\n",
        "    !rm -r {model}-exl2-{BPW}bpw\n",
        "repo_url = \"mistralai/Mistral-7B-Instruct-v0.2\" # @param {type:\"string\"}\n",
        "model = repo_url.replace(\"/\", \"_\")\n",
        "!python download-model.py {repo_url}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "8anbEbGyNmBI"
      },
      "outputs": [],
      "source": [
        "#@title Quantize the model\n",
        "#@markdown ###Quantization time will last based on model size\n",
        "#@markdown Target bits per weight:\n",
        "BPW = \"4.125\" # @param {type:\"string\"}\n",
        "!mkdir {model}-exl2-{BPW}bpw-WD\n",
        "!mkdir {model}-exl2-{BPW}bpw\n",
        "!cp models/{model}/config.json {model}-exl2-{BPW}bpw-WD\n",
        "#@markdown Calibrate with dataset, may improve model output (optional):\n",
        "Calibrate = True # @param {type:\"boolean\"}\n",
        "#@markdown Calibration dataset, enable calibrate above (must be parquet file):\n",
        "if Calibrate == True:\n",
        "    dataset_url = \"https://huggingface.co/datasets/wikitext/resolve/refs%2Fconvert%2Fparquet/wikitext-103-v1/test/0000.parquet?download=true\" # @param {type:\"string\"}\n",
        "    dataset_url = dataset_url.replace(\"?download=true\", \"\")\n",
        "    if dsd == 'false':\n",
        "        !wget {dataset_url}\n",
        "        dsd = 'true'\n",
        "    dataset = dataset_url.split(\"/\")[-1]\n",
        "#@markdown To use a calibration dataset, enter the huggingface resolve url. Right click the download button and copy the link. Afterwards, paste the link into dataset_url.\n",
        "#@markdown ![Example Image](https://huggingface.co/Anthonyg5005/hf-scripts/resolve/main/ipynb/dataset-example.jpg \"Copy from download button\")\n",
        "if Calibrate == True:\n",
        "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -c {dataset} -b {BPW}\"\n",
        "else:\n",
        "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
        "!python {quant}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "XORLS2uPrbma"
      },
      "outputs": [],
      "source": [
        "#@title Upload to huggingface privately\n",
        "#@markdown You may also set it to public but I'd recommend waiting for my next ipynb that will create mutliple quants and place them all into individual branches.\n",
        "!rm -r {model}-exl2-{BPW}bpw-WD\n",
        "!rm -r models/{model}\n",
        "print(\"Uploading to Huggingface. May take a while\")\n",
        "from huggingface_hub import HfApi, whoami, create_repo\n",
        "create_repo(f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", private=True)\n",
        "HfApi().upload_folder(folder_path=f\"{model}-exl2-{BPW}bpw\", repo_id=f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", repo_type=\"model\", commit_message=\"Upload from Colab automation\")\n",
        "print(f\"uploaded to {whoami().get('name', None)}/{model}-exl2-{BPW}bpw\")"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}