Add checks for F16.gguf and imatrix.dat, as well make HF model removal optional by default.

---

@ABX-AI

---

- Checks for F16.gguf before initial conversion.
- Checks for imatrix.dat in the GGUF dir before generation to make it more convenient for use with user supplied imatrix data.
- Now asks for user if they want to remove the original downloaded HF repo, defaulting to 'no'.

Files changed (1) hide show

gguf-imat.py +44 -21

gguf-imat.py CHANGED Viewed

@@ -5,7 +5,7 @@ import subprocess
 import shutil
 from huggingface_hub import snapshot_download
-# Clone or update the llama.cpp repository with shallow cloning
 def clone_or_update_llama_cpp():
     print("Preparing...")
     base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -18,7 +18,7 @@ def clone_or_update_llama_cpp():
     os.chdir(base_dir)
     print("The 'llama.cpp' repository is ready.")
-# Cownload and extract the latest release of llama.cpp
 def download_llama_release():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -45,7 +45,7 @@ def download_llama_release():
     else:
         print("Failed to fetch the latest release information.")
-# Download and extract cudart if necessary
 def download_cudart_if_necessary(latest_release_tag):
     base_dir = os.path.dirname(os.path.abspath(__file__))
     cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -71,7 +71,7 @@ def download_cudart_if_necessary(latest_release_tag):
         else:
             print("Failed to download the cudart release file.")
-# Collect user input and download the specified model repository
 def download_model_repo():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     models_dir = os.path.join(base_dir, "models")
@@ -82,21 +82,31 @@ def download_model_repo():
     model_name = model_id.split("/")[-1]
     model_dir = os.path.join(models_dir, model_name)
-    # Download the model repository if it doesn't exist
-    if not os.path.exists(model_dir):
         revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
         print("Downloading model repository...")
         snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
         print("Model repository downloaded successfully.")
-    else:
-        print("Model already exists.")
-    # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
-    convert_model_to_gguf_f16(base_dir, model_dir, model_name)
 # Convert the downloaded model to GGUF F16 format
-def convert_model_to_gguf_f16(base_dir, model_dir, model_name):
     convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
@@ -104,35 +114,48 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name):
     if not os.path.exists(gguf_dir):
         os.makedirs(gguf_dir)
-    # Execute the conversion command if F16 file doesn't exist
     if not os.path.exists(gguf_model_path):
         subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
-        # Delete the original model directory
-        shutil.rmtree(model_dir)
-        print(f"Original model directory '{model_dir}' deleted.")
-        # Execute the imatrix command if imatrix.dat doesn't exist
         imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
         imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
         imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
         if not os.path.exists(imatrix_output):
-            subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"])
             # Move the imatrix.dat file to the GGUF folder
-            shutil.move("imatrix.dat", gguf_dir)
             print("imatrix.dat generated successfully.")
     # Quantize the models
     quantize_models(base_dir, model_name)
-# Qantize models with different options
 def quantize_models(base_dir, model_name):
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
     quantization_options = [
-        "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M",
-        "Q5_K_S", "Q6_K", "Q8_0", "IQ3_M", "IQ3_S", "IQ3_XS", "IQ3_XXS"
     ]
     for quant_option in quantization_options:

 import shutil
 from huggingface_hub import snapshot_download
+# Clone or update the llama.cpp repository with --depth 1
 def clone_or_update_llama_cpp():
     print("Preparing...")
     base_dir = os.path.dirname(os.path.abspath(__file__))
     os.chdir(base_dir)
     print("The 'llama.cpp' repository is ready.")
+# Download and extract the latest release of llama.cpp Windows binaries
 def download_llama_release():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     dl_dir = os.path.join(base_dir, "bin", "dl")
     else:
         print("Failed to fetch the latest release information.")
+# Download and extract the Cuda .dll resources if they aren't present in the bin folder
 def download_cudart_if_necessary(latest_release_tag):
     base_dir = os.path.dirname(os.path.abspath(__file__))
     cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
         else:
             print("Failed to download the cudart release file.")
+# Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
 def download_model_repo():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     models_dir = os.path.join(base_dir, "models")
     model_name = model_id.split("/")[-1]
     model_dir = os.path.join(models_dir, model_name)
+    # Check if the model repository already exists
+    if os.path.exists(model_dir):
+        print("Model repository already exists. Using existing repository.")
+        # If the model already exists, prompt the user if they want to delete the model directory
+        delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
+        # Convert the existing model to GGUF F16 format and generate imatrix.dat
+        convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
+    else:
         revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
+        # Ask the user if they want to remove the HF model folder after conversion
+        delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
         print("Downloading model repository...")
         snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
         print("Model repository downloaded successfully.")
+        # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
+        convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
 # Convert the downloaded model to GGUF F16 format
+def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir):
     convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
     if not os.path.exists(gguf_dir):
         os.makedirs(gguf_dir)
+    # Check if F16 file already exists
     if not os.path.exists(gguf_model_path):
+        # Execute the conversion command
         subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
+        # Delete the original model directory under conditions
+        if delete_model_dir == 'yes' or delete_model_dir == 'y':
+            shutil.rmtree(model_dir)
+            print(f"Original model directory '{model_dir}' deleted.")
+        else:
+            print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
+        # Check if imatrix.dat exists within gguf_dir
         imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
         imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
         imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
         if not os.path.exists(imatrix_output):
+            # Execute the imatrix command
+            subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"], cwd=gguf_dir)
             # Move the imatrix.dat file to the GGUF folder
+            shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
             print("imatrix.dat generated successfully.")
+        else:
+            print("Skipping imatrix generation as imatrix.dat already exists.")
+    else:
+        print("Skipping model conversion as F16 file already exists.")
     # Quantize the models
     quantize_models(base_dir, model_name)
+# Quantize models with different options
 def quantize_models(base_dir, model_name):
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
     quantization_options = [
+        "IQ3_M", "IQ3_XXS",
+        "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS",
+        "Q5_K_M", "Q5_K_S",
+        "Q6_K",
+        "Q8_0"
     ]
     for quant_option in quantization_options: