FantasiaFoundry
/

GGUF-Quantization-Script

Text Generation

GGUF

quantized

text-generation-inference

Model card Files Files and versions Community

Lewdiculous commited on Apr 19

Commit

de5a40c

•

1 Parent(s): b6bbbe0

Fix things.

Browse files

Files changed (1) hide show

gguf-imat.py +21 -15

gguf-imat.py CHANGED Viewed

@@ -89,8 +89,11 @@ def download_model_repo():
         # If the model already exists, prompt the user if they want to delete the model directory
         delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
         # Convert the existing model to GGUF F16 format and generate imatrix.dat
-        convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
     else:
         revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
@@ -102,11 +105,14 @@ def download_model_repo():
         snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
         print("Model repository downloaded successfully.")
         # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
-        convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
 # Convert the downloaded model to GGUF F16 format
-def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir):
     convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
@@ -126,21 +132,21 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir)
         else:
             print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
-        # Check if imatrix.dat exists within gguf_dir
-        imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
-        imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
-        imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
-        if not os.path.exists(imatrix_output):
-            # Execute the imatrix command
-            subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"], cwd=gguf_dir)
-            # Move the imatrix.dat file to the GGUF folder
             shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
             print("imatrix.dat generated successfully.")
         else:
-            print("Skipping imatrix generation as imatrix.dat already exists.")
     else:
-        print("Skipping model conversion as F16 file already exists.")
     # Quantize the models
     quantize_models(base_dir, model_name)
@@ -177,4 +183,4 @@ def main():
     print("Finished preparing resources.")
 if __name__ == "__main__":
-    main()

         # If the model already exists, prompt the user if they want to delete the model directory
         delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
+        # Ask for the name of the imatrix.txt file
+        imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
         # Convert the existing model to GGUF F16 format and generate imatrix.dat
+        convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
     else:
         revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
         snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
         print("Model repository downloaded successfully.")
+        # Ask for the name of the imatrix.txt file
+        imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
         # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
+        convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
 # Convert the downloaded model to GGUF F16 format
+def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
     convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
         else:
             print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
+    # Generate imatrix.dat if it doesn't exist
+    imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
+    imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
+    imatrix_txt = os.path.join(base_dir, "imatrix", imatrix_file_name)
+    if not os.path.exists(imatrix_output):
+        # Execute the imatrix command
+        subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"], cwd=gguf_dir)
+        # Move the imatrix.dat file to the GGUF folder
+        if os.path.exists(os.path.join(gguf_dir, "imatrix.dat")):
             shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
             print("imatrix.dat generated successfully.")
         else:
+            print("Failed to generate imatrix.dat file.")
     else:
+        print("Skipping imatrix generation as imatrix.dat already exists.")
     # Quantize the models
     quantize_models(base_dir, model_name)
     print("Finished preparing resources.")
 if __name__ == "__main__":
+    main()