Anthonyg5005
commited on
Commit
·
67e3618
1
Parent(s):
587f897
added more info and updated setups
Browse filescan't update zipped files on laptop
- auto-exl2-upload/INSTRUCTIONS.txt +7 -5
- auto-exl2-upload/exl2-quant.py +7 -2
- auto-exl2-upload/linux-setup.sh +8 -1
- auto-exl2-upload/windows-setup.bat +7 -0
- exl2-multi-quant-local/INSTRUCTIONS.txt +7 -5
- exl2-multi-quant-local/exl2-quant.py +8 -3
- exl2-multi-quant-local/linux-setup.sh +7 -0
- exl2-multi-quant-local/windows-setup.bat +7 -0
- ipynb/Multi_Quant_exl2.ipynb +12 -20
auto-exl2-upload/INSTRUCTIONS.txt
CHANGED
@@ -19,18 +19,20 @@ For example, on Ubuntu use: sudo apt-get install build-essential
|
|
19 |
|
20 |
This may work with AMD cards but only on linux and possibly WSL2. I can't guarantee that it will work on AMD cards, I personally don't have one to test with. You may need to install stuff before starting. https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html
|
21 |
|
|
|
22 |
|
23 |
|
24 |
-
|
|
|
25 |
|
26 |
After setup is complete then you'll have a file called start-quant. Use this to run the quant script.
|
27 |
|
28 |
Make sure that your storage space is 3x the amount of the model's size. To mesure this, take the number of billion parameters and mutliply by two, afterwards mutliply by 3 and that's the recommended storage. There's a chance you may get away with 2.5x the size as well.
|
29 |
-
Make sure to also have a lot of RAM depending on the model.
|
30 |
|
31 |
-
If you close the terminal or the terminal crashes, check the last BPW it was on and enter the remaining quants you wanted. It should be able to pick up where it left off. Don't type the BPW of completed quants as it will start from the beginning. You may also use ctrl + c pause at any time during the quant process.
|
32 |
|
33 |
-
To add more options to the quantization process, you can add them to line
|
34 |
|
35 |
Things may break in the future as it downloads the latest version of all the dependencies which may either change names or how they work. If something breaks, please open a discussion at https://huggingface.co/Anthonyg5005/hf-scripts/discussions
|
36 |
|
@@ -44,4 +46,4 @@ https://github.com/oobabooga
|
|
44 |
Credit to Lucain Pouget for maintaining huggingface-hub.
|
45 |
https://github.com/Wauplin
|
46 |
|
47 |
-
Only tested with CUDA 12.1 on Windows 11
|
|
|
19 |
|
20 |
This may work with AMD cards but only on linux and possibly WSL2. I can't guarantee that it will work on AMD cards, I personally don't have one to test with. You may need to install stuff before starting. https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html
|
21 |
|
22 |
+
Only python 3.10 and 3.11
|
23 |
|
24 |
|
25 |
+
|
26 |
+
First setup your environment by using either windows.bat or linux.sh. If something fails during setup, then delete venv folder and try again.
|
27 |
|
28 |
After setup is complete then you'll have a file called start-quant. Use this to run the quant script.
|
29 |
|
30 |
Make sure that your storage space is 3x the amount of the model's size. To mesure this, take the number of billion parameters and mutliply by two, afterwards mutliply by 3 and that's the recommended storage. There's a chance you may get away with 2.5x the size as well.
|
31 |
+
Make sure to also have a lot of RAM depending on the model. Have noticed gemma to use a lot.
|
32 |
|
33 |
+
If you close the terminal or the terminal crashes, check the last BPW it was on and enter the remaining quants you wanted. It should be able to pick up where it left off. Don't type the BPW of completed quants as it will start from the beginning. You may also use ctrl + c to pause at any time during the quant process.
|
34 |
|
35 |
+
To add more options to the quantization process, you can add them to line 174. All options: https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
|
36 |
|
37 |
Things may break in the future as it downloads the latest version of all the dependencies which may either change names or how they work. If something breaks, please open a discussion at https://huggingface.co/Anthonyg5005/hf-scripts/discussions
|
38 |
|
|
|
46 |
Credit to Lucain Pouget for maintaining huggingface-hub.
|
47 |
https://github.com/Wauplin
|
48 |
|
49 |
+
Only tested with CUDA 12.1 on Windows 11
|
auto-exl2-upload/exl2-quant.py
CHANGED
@@ -108,6 +108,7 @@ bpwvalue = list(qnum.values())
|
|
108 |
#sort the list from smallest to largest
|
109 |
bpwvalue.sort()
|
110 |
|
|
|
111 |
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model was converted to safetensors, skip download if it was
|
112 |
result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) #download model from hf (Credit to oobabooga for this script)
|
113 |
if result.returncode != 0:
|
@@ -115,6 +116,7 @@ if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model
|
|
115 |
sys.exit("Exiting...")
|
116 |
clear_screen()
|
117 |
|
|
|
118 |
if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
|
119 |
convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
|
120 |
while convertst != 'y' and convertst != 'n':
|
@@ -125,8 +127,8 @@ if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model e
|
|
125 |
if result.returncode != 0:
|
126 |
print("Converting failed. Please look for a safetensors model or convert model manually.")
|
127 |
sys.exit("Exiting...")
|
128 |
-
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True)
|
129 |
-
subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True)
|
130 |
open(f"models{slsh}{model}{slsh}converted-st", 'w').close()
|
131 |
print("Finished converting")
|
132 |
else:
|
@@ -189,6 +191,7 @@ if file_exists(f"{whoami().get('name', None)}/{modelname}-exl2", "measurement.js
|
|
189 |
upload_file(path_or_fileobj=f"measurements{slsh}{model}-measure{slsh}measurement.json", path_in_repo="measurement.json", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message="Add measurement.json") #upload measurement.json to main
|
190 |
clear_screen()
|
191 |
|
|
|
192 |
delmodel = input("Do you want to delete the original model? (y/n): ")
|
193 |
while delmodel != 'y' and delmodel != 'n':
|
194 |
delmodel = input("Please enter 'y' or 'n': ")
|
@@ -198,6 +201,7 @@ if delmodel == 'y':
|
|
198 |
time.sleep(2)
|
199 |
clear_screen()
|
200 |
|
|
|
201 |
priv2pub = input("Do you want to make the repo public? (y/n): ")
|
202 |
while priv2pub != 'y' and priv2pub != 'n':
|
203 |
priv2pub = input("Please enter 'y' or 'n': ")
|
@@ -207,6 +211,7 @@ if priv2pub == 'y':
|
|
207 |
time.sleep(2)
|
208 |
clear_screen()
|
209 |
|
|
|
210 |
if tfound == 'false':
|
211 |
print(f'''
|
212 |
You are now logged in as {whoami().get('fullname', None)}.
|
|
|
108 |
#sort the list from smallest to largest
|
109 |
bpwvalue.sort()
|
110 |
|
111 |
+
#downloading the model
|
112 |
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model was converted to safetensors, skip download if it was
|
113 |
result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) #download model from hf (Credit to oobabooga for this script)
|
114 |
if result.returncode != 0:
|
|
|
116 |
sys.exit("Exiting...")
|
117 |
clear_screen()
|
118 |
|
119 |
+
#convert to safetensors if bin
|
120 |
if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
|
121 |
convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
|
122 |
while convertst != 'y' and convertst != 'n':
|
|
|
127 |
if result.returncode != 0:
|
128 |
print("Converting failed. Please look for a safetensors model or convert model manually.")
|
129 |
sys.exit("Exiting...")
|
130 |
+
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) #remove previous weights
|
131 |
+
subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True) #replace with safetensors
|
132 |
open(f"models{slsh}{model}{slsh}converted-st", 'w').close()
|
133 |
print("Finished converting")
|
134 |
else:
|
|
|
191 |
upload_file(path_or_fileobj=f"measurements{slsh}{model}-measure{slsh}measurement.json", path_in_repo="measurement.json", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message="Add measurement.json") #upload measurement.json to main
|
192 |
clear_screen()
|
193 |
|
194 |
+
#ask to delete original fp16 weights
|
195 |
delmodel = input("Do you want to delete the original model? (y/n): ")
|
196 |
while delmodel != 'y' and delmodel != 'n':
|
197 |
delmodel = input("Please enter 'y' or 'n': ")
|
|
|
201 |
time.sleep(2)
|
202 |
clear_screen()
|
203 |
|
204 |
+
#ask to change repo visibility to public on hf hub
|
205 |
priv2pub = input("Do you want to make the repo public? (y/n): ")
|
206 |
while priv2pub != 'y' and priv2pub != 'n':
|
207 |
priv2pub = input("Please enter 'y' or 'n': ")
|
|
|
211 |
time.sleep(2)
|
212 |
clear_screen()
|
213 |
|
214 |
+
#if new sign in, tell user
|
215 |
if tfound == 'false':
|
216 |
print(f'''
|
217 |
You are now logged in as {whoami().get('fullname', None)}.
|
auto-exl2-upload/linux-setup.sh
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
if [ ! -d "venv" ]; then
|
7 |
python3 -m venv venv
|
8 |
else
|
9 |
-
echo "venv directory already exists. If something is broken, delete
|
10 |
read -p "Press enter to continue"
|
11 |
exit
|
12 |
fi
|
@@ -40,6 +40,13 @@ else
|
|
40 |
exit
|
41 |
fi
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# download stuff
|
44 |
echo "Downloading files"
|
45 |
git clone https://github.com/turboderp/exllamav2
|
|
|
6 |
if [ ! -d "venv" ]; then
|
7 |
python3 -m venv venv
|
8 |
else
|
9 |
+
echo "venv directory already exists. If something is broken, delete venv folder and run this script again."
|
10 |
read -p "Press enter to continue"
|
11 |
exit
|
12 |
fi
|
|
|
40 |
exit
|
41 |
fi
|
42 |
|
43 |
+
echo "Deleting potential conflicting files"
|
44 |
+
rm convert-to-safetensors.py
|
45 |
+
rm download-model.py
|
46 |
+
rm -rf exllamav2
|
47 |
+
rm start-quant.sh
|
48 |
+
rm enter-venv.sh
|
49 |
+
|
50 |
# download stuff
|
51 |
echo "Downloading files"
|
52 |
git clone https://github.com/turboderp/exllamav2
|
auto-exl2-upload/windows-setup.bat
CHANGED
@@ -41,6 +41,13 @@ if "%cuda_version%"=="11" (
|
|
41 |
exit
|
42 |
)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
REM download stuff
|
45 |
echo Downloading files...
|
46 |
git clone https://github.com/turboderp/exllamav2
|
|
|
41 |
exit
|
42 |
)
|
43 |
|
44 |
+
echo Deleting potential conflicting files
|
45 |
+
del convert-to-safetensors.py
|
46 |
+
del download-model.py
|
47 |
+
rmdir /s /q exllamav2
|
48 |
+
del start-quant.sh
|
49 |
+
del enter-venv.sh
|
50 |
+
|
51 |
REM download stuff
|
52 |
echo Downloading files...
|
53 |
git clone https://github.com/turboderp/exllamav2
|
exl2-multi-quant-local/INSTRUCTIONS.txt
CHANGED
@@ -19,18 +19,20 @@ For example, on Ubuntu use: sudo apt-get install build-essential
|
|
19 |
|
20 |
This may work with AMD cards but only on linux and possibly WSL2. I can't guarantee that it will work on AMD cards, I personally don't have one to test with. You may need to install stuff before starting. https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html
|
21 |
|
|
|
22 |
|
23 |
|
24 |
-
|
|
|
25 |
|
26 |
After setup is complete then you'll have a file called start-quant. Use this to run the quant script.
|
27 |
|
28 |
Make sure that your storage space is 3x the amount of the model's size plus 1 more time per quant. To mesure this, take the number of billion parameters and mutliply by two, afterwards mutliply by 3 and that's the recommended storage. There's a chance you may get away with 2.5x the size as well.
|
29 |
-
Make sure to also have a lot of RAM depending on the model.
|
30 |
|
31 |
-
If you close the terminal or the terminal crashes, check the last BPW it was on and enter the remaining quants you wanted. It should be able to pick up where it left off. Don't type the BPW of completed quants as it will start from the beginning. You may also use ctrl + c pause at any time during the quant process.
|
32 |
|
33 |
-
To add more options to the quantization process, you can add them to line
|
34 |
|
35 |
Things may break in the future as it downloads the latest version of all the dependencies which may either change names or how they work. If something breaks, please open a discussion at https://huggingface.co/Anthonyg5005/hf-scripts/discussions
|
36 |
|
@@ -44,4 +46,4 @@ https://github.com/oobabooga
|
|
44 |
Credit to Lucain Pouget for maintaining huggingface-hub.
|
45 |
https://github.com/Wauplin
|
46 |
|
47 |
-
Only tested with CUDA 12.1 on Windows 11
|
|
|
19 |
|
20 |
This may work with AMD cards but only on linux and possibly WSL2. I can't guarantee that it will work on AMD cards, I personally don't have one to test with. You may need to install stuff before starting. https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html
|
21 |
|
22 |
+
Only python 3.10 and 3.11
|
23 |
|
24 |
|
25 |
+
|
26 |
+
First setup your environment by using either windows.bat or linux.sh. If something fails during setup, then delete venv folder and try again.
|
27 |
|
28 |
After setup is complete then you'll have a file called start-quant. Use this to run the quant script.
|
29 |
|
30 |
Make sure that your storage space is 3x the amount of the model's size plus 1 more time per quant. To mesure this, take the number of billion parameters and mutliply by two, afterwards mutliply by 3 and that's the recommended storage. There's a chance you may get away with 2.5x the size as well.
|
31 |
+
Make sure to also have a lot of RAM depending on the model. Have noticed gemma to use a lot.
|
32 |
|
33 |
+
If you close the terminal or the terminal crashes, check the last BPW it was on and enter the remaining quants you wanted. It should be able to pick up where it left off. Don't type the BPW of completed quants as it will start from the beginning. You may also use ctrl + c to pause at any time during the quant process.
|
34 |
|
35 |
+
To add more options to the quantization process, you can add them to line 136. All options: https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
|
36 |
|
37 |
Things may break in the future as it downloads the latest version of all the dependencies which may either change names or how they work. If something breaks, please open a discussion at https://huggingface.co/Anthonyg5005/hf-scripts/discussions
|
38 |
|
|
|
46 |
Credit to Lucain Pouget for maintaining huggingface-hub.
|
47 |
https://github.com/Wauplin
|
48 |
|
49 |
+
Only tested with CUDA 12.1 on Windows 11
|
exl2-multi-quant-local/exl2-quant.py
CHANGED
@@ -51,7 +51,7 @@ else:
|
|
51 |
login(input("API token not detected. Enter your HuggingFace token (empty to skip): "))
|
52 |
except:
|
53 |
print("Skipping login... (Unable to access private or gated models)")
|
54 |
-
tfound = "false but skipped" #doesn't matter what this is, only false is used
|
55 |
time.sleep(3)
|
56 |
clear_screen()
|
57 |
|
@@ -71,6 +71,7 @@ qmount = int(input("Enter the number of quants you want to create: "))
|
|
71 |
qmount += 1
|
72 |
clear_screen()
|
73 |
|
|
|
74 |
delmodel = input("Do you want to delete the original model after finishing? (Won't delete if canceled or failed) (y/n): ")
|
75 |
while delmodel != 'y' and delmodel != 'n':
|
76 |
delmodel = input("Please enter 'y' or 'n': ")
|
@@ -92,6 +93,7 @@ bpwvalue = list(qnum.values())
|
|
92 |
#sort the list from smallest to largest
|
93 |
bpwvalue.sort()
|
94 |
|
|
|
95 |
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model was converted to safetensors, skip download if it was
|
96 |
result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) #download model from hf (Credit to oobabooga for this script)
|
97 |
if result.returncode != 0:
|
@@ -99,6 +101,7 @@ if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model
|
|
99 |
sys.exit("Exiting...")
|
100 |
clear_screen()
|
101 |
|
|
|
102 |
if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
|
103 |
convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
|
104 |
while convertst != 'y' and convertst != 'n':
|
@@ -109,8 +112,8 @@ if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model e
|
|
109 |
if result.returncode != 0:
|
110 |
print("Converting failed. Please look for a safetensors model or convert model manually.")
|
111 |
sys.exit("Exiting...")
|
112 |
-
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True)
|
113 |
-
subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True)
|
114 |
open(f"models{slsh}{model}{slsh}converted-st", 'w').close()
|
115 |
print("Finished converting")
|
116 |
else:
|
@@ -140,12 +143,14 @@ for bpw in bpwvalue:
|
|
140 |
open(f"measurements{slsh}{model}-measure/Delete folder when no more quants are needed from this model", 'w').close()
|
141 |
subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
|
142 |
|
|
|
143 |
if delmodel == 'y':
|
144 |
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True)
|
145 |
print(f"Deleted models/{model}")
|
146 |
time.sleep(2)
|
147 |
clear_screen()
|
148 |
|
|
|
149 |
if tfound == 'false':
|
150 |
print(f'''
|
151 |
You are now logged in as {whoami().get('fullname', None)}.
|
|
|
51 |
login(input("API token not detected. Enter your HuggingFace token (empty to skip): "))
|
52 |
except:
|
53 |
print("Skipping login... (Unable to access private or gated models)")
|
54 |
+
tfound = "false but skipped" #doesn't matter what this is, only 'false' is used
|
55 |
time.sleep(3)
|
56 |
clear_screen()
|
57 |
|
|
|
71 |
qmount += 1
|
72 |
clear_screen()
|
73 |
|
74 |
+
#ask to delete fp16 after done
|
75 |
delmodel = input("Do you want to delete the original model after finishing? (Won't delete if canceled or failed) (y/n): ")
|
76 |
while delmodel != 'y' and delmodel != 'n':
|
77 |
delmodel = input("Please enter 'y' or 'n': ")
|
|
|
93 |
#sort the list from smallest to largest
|
94 |
bpwvalue.sort()
|
95 |
|
96 |
+
#downloading the model
|
97 |
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model was converted to safetensors, skip download if it was
|
98 |
result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) #download model from hf (Credit to oobabooga for this script)
|
99 |
if result.returncode != 0:
|
|
|
101 |
sys.exit("Exiting...")
|
102 |
clear_screen()
|
103 |
|
104 |
+
#convert to safetensors if bin
|
105 |
if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
|
106 |
convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
|
107 |
while convertst != 'y' and convertst != 'n':
|
|
|
112 |
if result.returncode != 0:
|
113 |
print("Converting failed. Please look for a safetensors model or convert model manually.")
|
114 |
sys.exit("Exiting...")
|
115 |
+
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) #remove previous weights
|
116 |
+
subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True) #replace with safetensors
|
117 |
open(f"models{slsh}{model}{slsh}converted-st", 'w').close()
|
118 |
print("Finished converting")
|
119 |
else:
|
|
|
143 |
open(f"measurements{slsh}{model}-measure/Delete folder when no more quants are needed from this model", 'w').close()
|
144 |
subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
|
145 |
|
146 |
+
# if chose to delete model at the beginning, delete the model
|
147 |
if delmodel == 'y':
|
148 |
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True)
|
149 |
print(f"Deleted models/{model}")
|
150 |
time.sleep(2)
|
151 |
clear_screen()
|
152 |
|
153 |
+
#if new sign in, tell user
|
154 |
if tfound == 'false':
|
155 |
print(f'''
|
156 |
You are now logged in as {whoami().get('fullname', None)}.
|
exl2-multi-quant-local/linux-setup.sh
CHANGED
@@ -40,6 +40,13 @@ else
|
|
40 |
exit
|
41 |
fi
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# download stuff
|
44 |
echo "Downloading files"
|
45 |
git clone https://github.com/turboderp/exllamav2
|
|
|
40 |
exit
|
41 |
fi
|
42 |
|
43 |
+
echo "Deleting potential conflicting files"
|
44 |
+
rm convert-to-safetensors.py
|
45 |
+
rm download-model.py
|
46 |
+
rm -rf exllamav2
|
47 |
+
rm start-quant.sh
|
48 |
+
rm enter-venv.sh
|
49 |
+
|
50 |
# download stuff
|
51 |
echo "Downloading files"
|
52 |
git clone https://github.com/turboderp/exllamav2
|
exl2-multi-quant-local/windows-setup.bat
CHANGED
@@ -41,6 +41,13 @@ if "%cuda_version%"=="11" (
|
|
41 |
exit
|
42 |
)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
REM download stuff
|
45 |
echo Downloading files...
|
46 |
git clone https://github.com/turboderp/exllamav2
|
|
|
41 |
exit
|
42 |
)
|
43 |
|
44 |
+
echo Deleting potential conflicting files
|
45 |
+
del convert-to-safetensors.py
|
46 |
+
del download-model.py
|
47 |
+
rmdir /s /q exllamav2
|
48 |
+
del start-quant.sh
|
49 |
+
del enter-venv.sh
|
50 |
+
|
51 |
REM download stuff
|
52 |
echo Downloading files...
|
53 |
git clone https://github.com/turboderp/exllamav2
|
ipynb/Multi_Quant_exl2.ipynb
CHANGED
@@ -63,7 +63,6 @@
|
|
63 |
" You cannot log in.\n",
|
64 |
" Either set the environment variable to a 'WRITE' token or remove it.\n",
|
65 |
" ''')\n",
|
66 |
-
" input(\"Press enter to continue.\")\n",
|
67 |
" sys.exit(\"Exiting...\")\n",
|
68 |
" if os.environ.get('COLAB_BACKEND_VERSION', None) is not None:\n",
|
69 |
" print('''\n",
|
@@ -95,7 +94,7 @@
|
|
95 |
"#@title Start quant\n",
|
96 |
"#@markdown ### Using subprocess to execute scripts doesn't output on Colab. If something seems frozen, please wait. Any detected errors will automatically stop Colab\n",
|
97 |
"#import required modules\n",
|
98 |
-
"from huggingface_hub import
|
99 |
"import os\n",
|
100 |
"import sys\n",
|
101 |
"import subprocess\n",
|
@@ -158,24 +157,17 @@
|
|
158 |
" sys.exit(\"Exiting...\")\n",
|
159 |
" print(\"Download finished\\n\\n\")\n",
|
160 |
"\n",
|
161 |
-
"
|
162 |
-
"
|
163 |
-
"
|
164 |
-
"
|
165 |
-
"
|
166 |
-
"
|
167 |
-
"
|
168 |
-
"
|
169 |
-
"
|
170 |
-
"
|
171 |
-
"
|
172 |
-
" subprocess.run(f\"{osrmd} models{slsh}{model}\", shell=True)\n",
|
173 |
-
" subprocess.run(f\"{osmv} models{slsh}{model}-st models{slsh}{model}\", shell=True)\n",
|
174 |
-
" open(f\"models{slsh}{model}{slsh}converted-st\", 'w').close()\n",
|
175 |
-
" print(\"Finished converting\")\n",
|
176 |
-
" print(\"\\n\\n\")\n",
|
177 |
-
" else:\n",
|
178 |
-
" sys.exit(\"Can't quantize a non-safetensors model. Exiting...\")\n",
|
179 |
"\n",
|
180 |
"#create new repo if one doesn't already exist\n",
|
181 |
"if repo_exists(f\"{whoami().get('name', None)}/{modelname}-exl2\") == False:\n",
|
|
|
63 |
" You cannot log in.\n",
|
64 |
" Either set the environment variable to a 'WRITE' token or remove it.\n",
|
65 |
" ''')\n",
|
|
|
66 |
" sys.exit(\"Exiting...\")\n",
|
67 |
" if os.environ.get('COLAB_BACKEND_VERSION', None) is not None:\n",
|
68 |
" print('''\n",
|
|
|
94 |
"#@title Start quant\n",
|
95 |
"#@markdown ### Using subprocess to execute scripts doesn't output on Colab. If something seems frozen, please wait. Any detected errors will automatically stop Colab\n",
|
96 |
"#import required modules\n",
|
97 |
+
"from huggingface_hub import repo_exists, upload_folder, create_repo, upload_file, create_branch\n",
|
98 |
"import os\n",
|
99 |
"import sys\n",
|
100 |
"import subprocess\n",
|
|
|
157 |
" sys.exit(\"Exiting...\")\n",
|
158 |
" print(\"Download finished\\n\\n\")\n",
|
159 |
"\n",
|
160 |
+
"if not glob.glob(f\"models/{model}/*.safetensors\"): #check if safetensors model exists, if not try converting\n",
|
161 |
+
" print(\"Converting weights to safetensors, please wait...\")\n",
|
162 |
+
" result = subprocess.run(f\"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st\", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)\n",
|
163 |
+
" if result.returncode != 0:\n",
|
164 |
+
" print(\"Converting failed. Please look for a safetensors/bin model.\")\n",
|
165 |
+
" sys.exit(\"Exiting...\")\n",
|
166 |
+
" subprocess.run(f\"{osrmd} models{slsh}{model}\", shell=True)\n",
|
167 |
+
" subprocess.run(f\"{osmv} models{slsh}{model}-st models{slsh}{model}\", shell=True)\n",
|
168 |
+
" open(f\"models{slsh}{model}{slsh}converted-st\", 'w').close()\n",
|
169 |
+
" print(\"Finished converting\")\n",
|
170 |
+
" print(\"\\n\\n\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
"\n",
|
172 |
"#create new repo if one doesn't already exist\n",
|
173 |
"if repo_exists(f\"{whoami().get('name', None)}/{modelname}-exl2\") == False:\n",
|