Add checks for F16.gguf and imatrix.dat, as well make HF model removal optional by default.

#11
Files changed (1) hide show
  1. gguf-imat.py +44 -21
gguf-imat.py CHANGED
@@ -5,7 +5,7 @@ import subprocess
5
  import shutil
6
  from huggingface_hub import snapshot_download
7
 
8
- # Clone or update the llama.cpp repository with shallow cloning
9
  def clone_or_update_llama_cpp():
10
  print("Preparing...")
11
  base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -18,7 +18,7 @@ def clone_or_update_llama_cpp():
18
  os.chdir(base_dir)
19
  print("The 'llama.cpp' repository is ready.")
20
 
21
- # Cownload and extract the latest release of llama.cpp
22
  def download_llama_release():
23
  base_dir = os.path.dirname(os.path.abspath(__file__))
24
  dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -45,7 +45,7 @@ def download_llama_release():
45
  else:
46
  print("Failed to fetch the latest release information.")
47
 
48
- # Download and extract cudart if necessary
49
  def download_cudart_if_necessary(latest_release_tag):
50
  base_dir = os.path.dirname(os.path.abspath(__file__))
51
  cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -71,7 +71,7 @@ def download_cudart_if_necessary(latest_release_tag):
71
  else:
72
  print("Failed to download the cudart release file.")
73
 
74
- # Collect user input and download the specified model repository
75
  def download_model_repo():
76
  base_dir = os.path.dirname(os.path.abspath(__file__))
77
  models_dir = os.path.join(base_dir, "models")
@@ -82,21 +82,31 @@ def download_model_repo():
82
  model_name = model_id.split("/")[-1]
83
  model_dir = os.path.join(models_dir, model_name)
84
 
85
- # Download the model repository if it doesn't exist
86
- if not os.path.exists(model_dir):
 
 
 
 
 
 
 
 
 
87
  revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
88
 
 
 
 
89
  print("Downloading model repository...")
90
  snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
91
  print("Model repository downloaded successfully.")
92
- else:
93
- print("Model already exists.")
94
 
95
- # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
96
- convert_model_to_gguf_f16(base_dir, model_dir, model_name)
97
 
98
  # Convert the downloaded model to GGUF F16 format
99
- def convert_model_to_gguf_f16(base_dir, model_dir, model_name):
100
  convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
101
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
102
  gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
@@ -104,35 +114,48 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name):
104
  if not os.path.exists(gguf_dir):
105
  os.makedirs(gguf_dir)
106
 
107
- # Execute the conversion command if F16 file doesn't exist
108
  if not os.path.exists(gguf_model_path):
 
109
  subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
110
 
111
- # Delete the original model directory
112
- shutil.rmtree(model_dir)
113
- print(f"Original model directory '{model_dir}' deleted.")
 
 
 
114
 
115
- # Execute the imatrix command if imatrix.dat doesn't exist
116
  imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
117
  imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
118
  imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
119
  if not os.path.exists(imatrix_output):
120
- subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"])
 
121
  # Move the imatrix.dat file to the GGUF folder
122
- shutil.move("imatrix.dat", gguf_dir)
123
  print("imatrix.dat generated successfully.")
 
 
 
 
 
124
 
125
  # Quantize the models
126
  quantize_models(base_dir, model_name)
127
 
128
- # Qantize models with different options
129
  def quantize_models(base_dir, model_name):
130
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
131
  f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
132
 
133
  quantization_options = [
134
- "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M",
135
- "Q5_K_S", "Q6_K", "Q8_0", "IQ3_M", "IQ3_S", "IQ3_XS", "IQ3_XXS"
 
 
 
136
  ]
137
 
138
  for quant_option in quantization_options:
 
5
  import shutil
6
  from huggingface_hub import snapshot_download
7
 
8
+ # Clone or update the llama.cpp repository with --depth 1
9
  def clone_or_update_llama_cpp():
10
  print("Preparing...")
11
  base_dir = os.path.dirname(os.path.abspath(__file__))
 
18
  os.chdir(base_dir)
19
  print("The 'llama.cpp' repository is ready.")
20
 
21
+ # Download and extract the latest release of llama.cpp Windows binaries
22
  def download_llama_release():
23
  base_dir = os.path.dirname(os.path.abspath(__file__))
24
  dl_dir = os.path.join(base_dir, "bin", "dl")
 
45
  else:
46
  print("Failed to fetch the latest release information.")
47
 
48
+ # Download and extract the Cuda .dll resources if they aren't present in the bin folder
49
  def download_cudart_if_necessary(latest_release_tag):
50
  base_dir = os.path.dirname(os.path.abspath(__file__))
51
  cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
 
71
  else:
72
  print("Failed to download the cudart release file.")
73
 
74
+ # Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
75
  def download_model_repo():
76
  base_dir = os.path.dirname(os.path.abspath(__file__))
77
  models_dir = os.path.join(base_dir, "models")
 
82
  model_name = model_id.split("/")[-1]
83
  model_dir = os.path.join(models_dir, model_name)
84
 
85
+ # Check if the model repository already exists
86
+ if os.path.exists(model_dir):
87
+ print("Model repository already exists. Using existing repository.")
88
+
89
+ # If the model already exists, prompt the user if they want to delete the model directory
90
+ delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
91
+
92
+ # Convert the existing model to GGUF F16 format and generate imatrix.dat
93
+ convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
94
+
95
+ else:
96
  revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
97
 
98
+ # Ask the user if they want to remove the HF model folder after conversion
99
+ delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
100
+
101
  print("Downloading model repository...")
102
  snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
103
  print("Model repository downloaded successfully.")
 
 
104
 
105
+ # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
106
+ convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
107
 
108
  # Convert the downloaded model to GGUF F16 format
109
+ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir):
110
  convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
111
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
112
  gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
 
114
  if not os.path.exists(gguf_dir):
115
  os.makedirs(gguf_dir)
116
 
117
+ # Check if F16 file already exists
118
  if not os.path.exists(gguf_model_path):
119
+ # Execute the conversion command
120
  subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
121
 
122
+ # Delete the original model directory under conditions
123
+ if delete_model_dir == 'yes' or delete_model_dir == 'y':
124
+ shutil.rmtree(model_dir)
125
+ print(f"Original model directory '{model_dir}' deleted.")
126
+ else:
127
+ print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
128
 
129
+ # Check if imatrix.dat exists within gguf_dir
130
  imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
131
  imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
132
  imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
133
  if not os.path.exists(imatrix_output):
134
+ # Execute the imatrix command
135
+ subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"], cwd=gguf_dir)
136
  # Move the imatrix.dat file to the GGUF folder
137
+ shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
138
  print("imatrix.dat generated successfully.")
139
+ else:
140
+ print("Skipping imatrix generation as imatrix.dat already exists.")
141
+
142
+ else:
143
+ print("Skipping model conversion as F16 file already exists.")
144
 
145
  # Quantize the models
146
  quantize_models(base_dir, model_name)
147
 
148
+ # Quantize models with different options
149
  def quantize_models(base_dir, model_name):
150
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
151
  f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
152
 
153
  quantization_options = [
154
+ "IQ3_M", "IQ3_XXS",
155
+ "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS",
156
+ "Q5_K_M", "Q5_K_S",
157
+ "Q6_K",
158
+ "Q8_0"
159
  ]
160
 
161
  for quant_option in quantization_options: