File size: 7,351 Bytes
4ca4e8f fa3019e 4ca4e8f 608be6d 4ca4e8f fa3019e 4ca4e8f fa3019e 4ca4e8f fa3019e 21ff782 fa3019e 4ca4e8f 67e3618 4ca4e8f 6486f13 9307bce 6486f13 9307bce 6486f13 67e3618 4ca4e8f 67e3618 4ca4e8f 67e3618 4ca4e8f adefdbb 4ca4e8f adefdbb 4ca4e8f 157ee7d 4ca4e8f adefdbb dc20c83 4ca4e8f aa9c683 67e3618 aa9c683 4ca4e8f 67e3618 4ca4e8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
#usually it's what is on the inside that counts, not this time. This script is a mess, but at least it works.
#import required modules
from huggingface_hub import login, logout, get_token, whoami, repo_exists
import os
import sys
import subprocess
import glob
import time
#define os differences
oname = os.name
if oname == 'nt':
osclear = 'cls'
osmv = 'move'
osrmd = 'rmdir /s /q'
oscp = 'copy'
pyt = 'venv\\scripts\\python.exe'
slsh = '\\'
elif oname == 'posix':
osclear = 'clear'
osmv = 'mv'
osrmd = 'rm -rf'
oscp = 'cp'
pyt = './venv/bin/python'
slsh = '/'
else:
sys.exit('This script is not compatible with your machine.')
def clear_screen():
os.system(osclear)
#get token
if os.environ.get('HF_TOKEN', None) is not None:
try:
login(get_token())
except ValueError:
print("You have an invalid token set in your environment variable HF_TOKEN. This will cause issues with this script\nRemove the variable or set it to a valid token.")
sys.exit("Exiting...")
if get_token() is not None:
tfound = 'true'
#if the token is found in either HF_TOKEN or cli login then log in:
try:
login(get_token())
except ValueError:
tfound = 'false'
try:
login(input("API token is no longer valid. Enter your new HuggingFace token (empty to logout): "))
except:
logout()
print("Logging out... (Unable to access private or gated models)")
tfound = 'false but logged out'
time.sleep(3)
else:
#if the token is not found then prompt user to provide it:
tfound = "false"
try:
login(input("API token not detected. Enter your HuggingFace token (empty to skip): "))
except:
print("Skipping login... (Unable to access private or gated models)")
tfound = "false but skipped" #doesn't matter what this is, only 'false' is used
time.sleep(3)
clear_screen()
#get original model repo url
repo_url = input("Enter unquantized model repository (User/Repo): ")
#look for repo
if repo_exists(repo_url) == False:
print(f"Model repo doesn't exist at https://huggingface.co/{repo_url}")
sys.exit("Exiting...")
model = repo_url.replace("/", "_")
modelname = repo_url.split("/")[1]
clear_screen()
#ask for number of quants
qmount = int(input("Enter the number of quants you want to create: "))
qmount += 1
clear_screen()
#save bpw values
print(f"Type the BPW for the following {qmount - 1} quants. Recommend staying over 2.4 BPW. Use the vram calculator to find the best BPW values: https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator")
qnum = {}
for i in range(1, qmount):
qnum[f"bpw{i}"] = float(input(f"Enter BPW for quant {i} (2.00-8.00): ")) #convert input to float for proper sorting
clear_screen()
#collect all values in a list for sorting
bpwvalue = list(qnum.values())
#sort the list from smallest to largest
bpwvalue.sort()
#ask to delete fp16 after done
delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ")
if delmodel == '':
delmodel = 'n'
while delmodel != 'y' and delmodel != 'n':
delmodel = input("Please enter 'y' or 'n': ")
if delmodel == '':
delmodel = 'n'
if delmodel == 'y':
print(f"Deleting dir models/{model} after quants are finished.")
time.sleep(3)
clear_screen()
#downloading the model
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model was converted to safetensors, skip download if it was
result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) #download model from hf (Credit to oobabooga for this script)
if result.returncode != 0:
print("Download failed.")
sys.exit("Exiting...")
clear_screen()
#convert to safetensors if bin
if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
while convertst != 'y' and convertst != 'n':
convertst = input("Please enter 'y' or 'n': ")
if convertst == 'y':
print("Converting weights to safetensors, please wait...")
result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
if result.returncode != 0:
print("Converting failed. Please look for a safetensors model or convert model manually.")
sys.exit("Exiting...")
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) #remove previous weights
subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True) #replace with safetensors
open(f"models{slsh}{model}{slsh}converted-st", 'w').close()
print("Finished converting")
else:
sys.exit("Can't quantize a non-safetensors model. Exiting...")
clear_screen()
#start converting
for bpw in bpwvalue:
if os.path.exists(f"measurements{slsh}{model}-measure{slsh}measurement.json"): # Check if measurement.json exists
cmdir = False
mskip = f" -m measurements{slsh}{model}-measure{slsh}measurement.json" #skip measurement if it exists
else:
cmdir = True
mskip = ""
print(f"Starting quantization for BPW {bpw}")
os.makedirs(f"{model}-exl2-{bpw}bpw-WD", exist_ok=True) #create working directory
os.makedirs(f"{modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw", exist_ok=True) #create compile full directory
subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) #copy config to working directory
#more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw -b {bpw}{mskip} -hb 8", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)
if result.returncode != 0:
print("Quantization failed.")
sys.exit("Exiting...")
if cmdir == True:
os.makedirs(f"measurements{slsh}{model}-measure", exist_ok=True) #create measurement directory
subprocess.run(f"{oscp} {model}-exl2-{bpw}bpw-WD{slsh}measurement.json measurements{slsh}{model}-measure", shell=True) #copy measurement to measure directory
open(f"measurements{slsh}{model}-measure/Delete folder when no more quants are needed from this model", 'w').close()
subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
# if chose to delete model at the beginning, delete the model
if delmodel == 'y':
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True)
print(f"Deleted models/{model}")
#if new sign in, tell user
if tfound == 'false':
print(f'''
You are now logged in as {whoami().get('fullname', None)}.
To logout, use the hf command line interface 'huggingface-cli logout'
To view your active account, use 'huggingface-cli whoami'
''')
print("Finished quantizing. Exiting...")
|