File size: 13,840 Bytes

{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "7PhL3HkpFeU7"
      },
      "outputs": [],
      "source": [
        "#@title Setup environment\n",
        "#@markdown Takes about 15 minutes to finish\n",
        "# download stuff\n",
        "!git clone https://github.com/turboderp/exllamav2\n",
        "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/convert-to-safetensors.py\n",
        "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
        "!pip install -r exllamav2/requirements.txt\n",
        "!pip install huggingface-hub transformers accelerate --upgrade\n",
        "!pip install ./exllamav2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "CXbUzOmNHyff"
      },
      "outputs": [],
      "source": [
        "#@title Login to Huggingface - Required\n",
        "#import required functions\n",
        "import os\n",
        "import sys\n",
        "from huggingface_hub import login, get_token, whoami\n",
        "\n",
        "#get token\n",
        "if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: #check if user in kaggle\n",
        "    from kaggle_secrets import UserSecretsClient # type: ignore\n",
        "    from kaggle_web_client import BackendError # type: ignore\n",
        "    try:\n",
        "        login(UserSecretsClient().get_secret(\"HF_TOKEN\")) #login if token secret found\n",
        "    except BackendError:\n",
        "        print('''\n",
        "            When using Kaggle, make sure to use the secret key HF_TOKEN with a 'WRITE' token.\n",
        "                   This will prevent the need to login every time you run the script.\n",
        "                   Set your secrets with the secrets add-on on the top of the screen.\n",
        "             ''')\n",
        "if get_token() is not None:\n",
        "    #if the token is found then log in:\n",
        "    login(get_token())\n",
        "else:\n",
        "    #if the token is not found then prompt user to provide it:\n",
        "    login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))\n",
        "\n",
        "#if the token is read only then prompt user to provide a write token (Only required if user needs a WRITE token, remove if READ is enough):\n",
        "while True:\n",
        "    if whoami().get('auth', {}).get('accessToken', {}).get('role', None) != 'write':\n",
        "        if os.environ.get('HF_TOKEN', None) is not None: #if environ finds HF_TOKEN as read-only then display following text and exit:\n",
        "            print('''\n",
        "          You have the environment variable HF_TOKEN set.\n",
        "          You cannot log in.\n",
        "          Either set the environment variable to a 'WRITE' token or remove it.\n",
        "                  ''')\n",
        "            sys.exit(\"Exiting...\")\n",
        "        if os.environ.get('COLAB_BACKEND_VERSION', None) is not None:\n",
        "            print('''\n",
        "                              Your Colab secret key is read-only\n",
        "                Please switch your key to 'write' or disable notebook access on the left.\n",
        "                  ''')\n",
        "            sys.exit(\"Stuck in a loop, exiting...\")\n",
        "        elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None:\n",
        "            print('''\n",
        "                                      Your Kaggle secret key is read-only\n",
        "                Please switch your key to 'write' or unattach from notebook in add-ons at the top.\n",
        "                          Having a read-only key attched will require login every time.\n",
        "                ''')\n",
        "        print(\"You do not have write access to this repository. Please use a valid token with (WRITE) access.\")\n",
        "        login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
        "        continue\n",
        "    break"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "dxKEA7obHLoO"
      },
      "outputs": [],
      "source": [
        "#@title Start quant\n",
        "#@markdown ### Using subprocess to execute scripts doesn't output on Colab. If something seems frozen, please wait. Any detected errors will automatically stop Colab\n",
        "#import required modules\n",
        "from huggingface_hub import repo_exists, upload_folder, create_repo, upload_file, create_branch\n",
        "import os\n",
        "import sys\n",
        "import subprocess\n",
        "import glob\n",
        "\n",
        "#define os differences\n",
        "oname = os.name\n",
        "if oname == 'nt':\n",
        "    osmv = 'move'\n",
        "    osrmd = 'rmdir /s /q'\n",
        "    oscp = 'copy'\n",
        "    pyt = 'venv\\\\scripts\\\\python.exe'\n",
        "    slsh = '\\\\'\n",
        "elif oname == 'posix':\n",
        "    osmv = 'mv'\n",
        "    osrmd = 'rm -r'\n",
        "    oscp = 'cp'\n",
        "    pyt = 'python'\n",
        "    slsh = '/'\n",
        "else:\n",
        "    sys.exit('This script is not compatible with your machine.')\n",
        "\n",
        "#get original model repo url\n",
        "#@markdown Enter unquantized model repository (User/Repo):\n",
        "repo_url = \"mistralai/Mistral-7B-Instruct-v0.2\" # @param {type:\"string\"}\n",
        "\n",
        "#look for repo\n",
        "if repo_exists(repo_url) == False:\n",
        "    print(f\"Model repo doesn't exist at https://huggingface.co/{repo_url}\")\n",
        "    sys.exit(\"Exiting...\")\n",
        "model = repo_url.replace(\"/\", \"_\")\n",
        "modelname = repo_url.split(\"/\")[1]\n",
        "print(\"\\n\\n\")\n",
        "\n",
        "#ask for number of quants\n",
        "#@markdown Enter the number of quants you want to create:\n",
        "quant_amount = \"5\" # @param {type:\"string\"}\n",
        "qmount = int(quant_amount)\n",
        "qmount += 1\n",
        "\n",
        "#save bpw values\n",
        "#@markdown You will be asked the BPW values after running this section.\n",
        "print(f\"Type the BPW for the following {qmount - 1} quants. Recommend staying over 2.4 BPW. Use the vram calculator to find the best BPW values: https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator\")\n",
        "qnum = {}\n",
        "for i in range(1, qmount):\n",
        "    qnum[f\"bpw{i}\"] = float(input(f\"Enter BPW for quant {i} (2.00-8.00): \")) #convert input to float for proper sorting\n",
        "print(\"\\n\\n\")\n",
        "\n",
        "#collect all values in a list for sorting\n",
        "bpwvalue = list(qnum.values())\n",
        "\n",
        "#sort the list from smallest to largest\n",
        "bpwvalue.sort()\n",
        "\n",
        "if not os.path.exists(f\"models{slsh}{model}{slsh}converted-st\"): #check if model was converted to safetensors, skip download if it was\n",
        "    print(\"Starting download...\")\n",
        "    result = subprocess.run(f\"{pyt} download-model.py {repo_url}\", shell=True) #download model from hf (Credit to oobabooga for this script)\n",
        "    if result.returncode != 0:\n",
        "        print(\"Download failed.\")\n",
        "        sys.exit(\"Exiting...\")\n",
        "    print(\"Download finished\\n\\n\")\n",
        "\n",
        "if not glob.glob(f\"models/{model}/*.safetensors\"): #check if safetensors model exists, if not try converting\n",
        "    print(\"Converting weights to safetensors, please wait...\")\n",
        "    result = subprocess.run(f\"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st --max-shard-size 1GB --bf16\", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)\n",
        "    if result.returncode != 0:\n",
        "        print(\"Converting failed. Please look for a safetensors/bin model.\")\n",
        "        sys.exit(\"Exiting...\")\n",
        "    subprocess.run(f\"{osrmd} models{slsh}{model}\", shell=True)\n",
        "    subprocess.run(f\"{osmv} models{slsh}{model}-st models{slsh}{model}\", shell=True)\n",
        "    open(f\"models{slsh}{model}{slsh}converted-st\", 'w').close()\n",
        "    print(\"Finished converting\")\n",
        "    print(\"\\n\\n\")\n",
        "\n",
        "#create new repo if one doesn't already exist\n",
        "if repo_exists(f\"{whoami().get('name', None)}/{modelname}-exl2\") == False:\n",
        "    print(\"Creating model repository...\")\n",
        "    create_repo(f\"{whoami().get('name', None)}/{modelname}-exl2\", private=True)\n",
        "    print(f\"Created repo at https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2\") #notify user of repo creation\n",
        "\n",
        "    #create the markdown file\n",
        "    print(\"Writing model card...\")\n",
        "    with open('./README.md', 'w') as file:\n",
        "        file.write(f\"# Exl2 quants for [{modelname}](https://huggingface.co/{repo_url})\\n\\n\")\n",
        "        file.write(\"## Automatically quantized using the auto quant from [hf-scripts](https://huggingface.co/anthonyg5005/hf-scripts)\\n\\n\")\n",
        "        file.write(f\"Would recommend {whoami().get('name', None)} to change up this README to include more info.\\n\\n\")\n",
        "        file.write(\"### BPW:\\n\\n\")\n",
        "        for bpw in bpwvalue:\n",
        "            file.write(f\"[{bpw}](https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/tree/{bpw}bpw)\\n\\n\")\n",
        "    print(\"Created README.md\")\n",
        "\n",
        "    upload_file(path_or_fileobj=\"README.md\", path_in_repo=\"README.md\", repo_id=f\"{whoami().get('name', None)}/{modelname}-exl2\", commit_message=\"Add temp README\") #upload md file\n",
        "    print(\"Uploaded README.md to main\")\n",
        "else:\n",
        "    print(f\"WARNING: repo already exists at https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2\")\n",
        "\n",
        "#start converting\n",
        "for bpw in bpwvalue:\n",
        "    if os.path.exists(f\"{model}-measure{slsh}measurement.json\"): # Check if measurement.json exists\n",
        "        cmdir = False\n",
        "        mskip = f\" -m {model}-measure{slsh}measurement.json\" #skip measurement if it exists\n",
        "    else:\n",
        "        cmdir = True\n",
        "        mskip = \"\"\n",
        "    print(f\"Starting quantization for BPW {bpw}. Please wait, may take hours\")\n",
        "    os.makedirs(f\"{model}-exl2-{bpw}bpw-WD\", exist_ok=True) #create working directory\n",
        "    os.makedirs(f\"{model}-exl2-{bpw}bpw\", exist_ok=True) #create compile full directory\n",
        "    subprocess.run(f\"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD\", shell=True) #copy config to working directory\n",
        "    #more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md\n",
        "    result = subprocess.run(f\"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {model}-exl2-{bpw}bpw -b {bpw}{mskip} -ss 2048\", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)\n",
        "    if result.returncode != 0:\n",
        "        print(\"Quantization failed.\")\n",
        "        sys.exit(\"Exiting...\")\n",
        "    print(f\"Down quantizing BPW {bpw}. Starting upload\")\n",
        "    if cmdir == True:\n",
        "        os.makedirs(f\"{model}-measure\", exist_ok=True) #create measurement directory\n",
        "        subprocess.run(f\"{oscp} {model}-exl2-{bpw}bpw-WD{slsh}measurement.json {model}-measure\", shell=True) #copy measurement to measure directory\n",
        "        open(f\"{model}-measure/Delete folder when no more quants are needed from this model\", 'w').close()\n",
        "    try:\n",
        "        create_branch(f\"{whoami().get('name', None)}/{modelname}-exl2\", branch=f\"{bpw}bpw\") #create branch\n",
        "    except:\n",
        "        print(f\"Branch {bpw} already exists, trying upload...\")\n",
        "    upload_folder(folder_path=f\"{model}-exl2-{bpw}bpw\", repo_id=f\"{whoami().get('name', None)}/{modelname}-exl2\", commit_message=f\"Add quant for BPW {bpw}\", revision=f\"{bpw}bpw\") #upload quantized model\n",
        "    subprocess.run(f\"{osrmd} {model}-exl2-{bpw}bpw-WD\", shell=True) #remove working directory\n",
        "    subprocess.run(f\"{osrmd} {model}-exl2-{bpw}bpw\", shell=True) #remove compile directory\n",
        "\n",
        "upload_file(path_or_fileobj=f\"{model}-measure{slsh}measurement.json\", path_in_repo=\"measurement.json\", repo_id=f\"{whoami().get('name', None)}/{modelname}-exl2\", commit_message=\"Add measurement.json\") #upload measurement.json to main\n",
        "\n",
        "print(f'''Quants available at https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2\n",
        "      \\nRepo is private, go to https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/settings to make public if you'd like.''')\n"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}