File size: 7,351 Bytes
4ca4e8f
 
fa3019e
4ca4e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608be6d
4ca4e8f
 
 
 
 
 
 
 
 
fa3019e
4ca4e8f
fa3019e
 
 
 
4ca4e8f
fa3019e
 
 
 
 
21ff782
fa3019e
 
 
 
 
 
 
4ca4e8f
 
 
 
 
 
 
67e3618
4ca4e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6486f13
9307bce
 
 
6486f13
 
9307bce
 
6486f13
 
 
 
 
67e3618
4ca4e8f
 
 
 
 
 
 
67e3618
4ca4e8f
 
 
 
 
 
 
 
 
 
67e3618
 
4ca4e8f
 
 
 
 
 
 
 
adefdbb
4ca4e8f
adefdbb
4ca4e8f
 
 
 
 
 
 
 
157ee7d
4ca4e8f
 
 
 
adefdbb
 
dc20c83
4ca4e8f
aa9c683
67e3618
aa9c683
 
 
4ca4e8f
67e3618
4ca4e8f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#usually it's what is on the inside that counts, not this time. This script is a mess, but at least it works.
#import required modules
from huggingface_hub import login, logout, get_token, whoami, repo_exists
import os
import sys
import subprocess
import glob
import time

#define os differences
oname = os.name
if oname == 'nt':
    osclear = 'cls'
    osmv = 'move'
    osrmd = 'rmdir /s /q'
    oscp = 'copy'
    pyt = 'venv\\scripts\\python.exe'
    slsh = '\\'
elif oname == 'posix':
    osclear = 'clear'
    osmv = 'mv'
    osrmd = 'rm -rf'
    oscp = 'cp'
    pyt = './venv/bin/python'
    slsh = '/'
else:
    sys.exit('This script is not compatible with your machine.')
def clear_screen():
    os.system(osclear)

#get token
if os.environ.get('HF_TOKEN', None) is not None:
    try:
        login(get_token())
    except ValueError:
        print("You have an invalid token set in your environment variable HF_TOKEN. This will cause issues with this script\nRemove the variable or set it to a valid token.")
        sys.exit("Exiting...")
if get_token() is not None:
    tfound = 'true'
    #if the token is found in either HF_TOKEN or cli login then log in:
    try:
        login(get_token())
    except ValueError:
        tfound = 'false'
        try:
            login(input("API token is no longer valid. Enter your new HuggingFace token (empty to logout): "))
        except:
            logout()
            print("Logging out... (Unable to access private or gated models)")
            tfound = 'false but logged out'
            time.sleep(3)
else:
    #if the token is not found then prompt user to provide it:
    tfound = "false"
    try:
        login(input("API token not detected. Enter your HuggingFace token (empty to skip): "))
    except:
        print("Skipping login... (Unable to access private or gated models)")
        tfound = "false but skipped" #doesn't matter what this is, only 'false' is used
        time.sleep(3)
clear_screen()

#get original model repo url
repo_url = input("Enter unquantized model repository (User/Repo): ")

#look for repo
if repo_exists(repo_url) == False:
    print(f"Model repo doesn't exist at https://huggingface.co/{repo_url}")
    sys.exit("Exiting...")
model = repo_url.replace("/", "_")
modelname = repo_url.split("/")[1]
clear_screen()

#ask for number of quants
qmount = int(input("Enter the number of quants you want to create: "))
qmount += 1
clear_screen()

#save bpw values
print(f"Type the BPW for the following {qmount - 1} quants. Recommend staying over 2.4 BPW. Use the vram calculator to find the best BPW values: https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator")
qnum = {}
for i in range(1, qmount):
    qnum[f"bpw{i}"] = float(input(f"Enter BPW for quant {i} (2.00-8.00): ")) #convert input to float for proper sorting
clear_screen()

#collect all values in a list for sorting
bpwvalue = list(qnum.values())

#sort the list from smallest to largest
bpwvalue.sort()

#ask to delete fp16 after done
delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ")
if delmodel == '':
    delmodel = 'n'
while delmodel != 'y' and delmodel != 'n':
    delmodel = input("Please enter 'y' or 'n': ")
    if delmodel == '':
        delmodel = 'n'
if delmodel == 'y':
    print(f"Deleting dir models/{model} after quants are finished.")
    time.sleep(3)
clear_screen()

#downloading the model
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model was converted to safetensors, skip download if it was
    result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) #download model from hf (Credit to oobabooga for this script)
    if result.returncode != 0:
        print("Download failed.")
        sys.exit("Exiting...")
    clear_screen()

#convert to safetensors if bin
if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
    convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
    while convertst != 'y' and convertst != 'n':
        convertst = input("Please enter 'y' or 'n': ")
    if convertst == 'y':
        print("Converting weights to safetensors, please wait...")
        result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
        if result.returncode != 0:
            print("Converting failed. Please look for a safetensors model or convert model manually.")
            sys.exit("Exiting...")
        subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) #remove previous weights
        subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True) #replace with safetensors
        open(f"models{slsh}{model}{slsh}converted-st", 'w').close()
        print("Finished converting")
    else:
        sys.exit("Can't quantize a non-safetensors model. Exiting...")
clear_screen()

#start converting
for bpw in bpwvalue:
    if os.path.exists(f"measurements{slsh}{model}-measure{slsh}measurement.json"): # Check if measurement.json exists
        cmdir = False
        mskip = f" -m measurements{slsh}{model}-measure{slsh}measurement.json" #skip measurement if it exists
    else:
        cmdir = True
        mskip = ""
    print(f"Starting quantization for BPW {bpw}")
    os.makedirs(f"{model}-exl2-{bpw}bpw-WD", exist_ok=True) #create working directory
    os.makedirs(f"{modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw", exist_ok=True) #create compile full directory
    subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) #copy config to working directory
    #more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
    result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw -b {bpw}{mskip} -hb 8", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)
    if result.returncode != 0:
        print("Quantization failed.")
        sys.exit("Exiting...")
    if cmdir == True:
        os.makedirs(f"measurements{slsh}{model}-measure", exist_ok=True) #create measurement directory
        subprocess.run(f"{oscp} {model}-exl2-{bpw}bpw-WD{slsh}measurement.json measurements{slsh}{model}-measure", shell=True) #copy measurement to measure directory
        open(f"measurements{slsh}{model}-measure/Delete folder when no more quants are needed from this model", 'w').close()
    subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
    
# if chose to delete model at the beginning, delete the model
if delmodel == 'y':
    subprocess.run(f"{osrmd} models{slsh}{model}", shell=True)
    print(f"Deleted models/{model}")

#if new sign in, tell user
if tfound == 'false':
    print(f'''
              You are now logged in as {whoami().get('fullname', None)}.
          
          To logout, use the hf command line interface 'huggingface-cli logout'
               To view your active account, use 'huggingface-cli whoami'
          ''')
    
print("Finished quantizing. Exiting...")