AetherArchitectural
/

GGUF-Quantization-Script

Text Generation

GGUF

quantized

text-generation-inference

Model card Files Files and versions Community

Lewdiculous commited on Apr 19, 2024

Commit

be9c0af

•

1 Parent(s): b56ecf9

More fixes.

Browse files

Files changed (1) hide show

gguf-imat-llama-3.py +10 -32

gguf-imat-llama-3.py CHANGED Viewed

@@ -5,7 +5,6 @@ import subprocess
 import shutil
 from huggingface_hub import snapshot_download
-# Clone or update the llama.cpp repository with --depth 1
 def clone_or_update_llama_cpp():
     print("Preparing...")
     base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -18,7 +17,6 @@ def clone_or_update_llama_cpp():
     os.chdir(base_dir)
     print("The 'llama.cpp' repository is ready.")
-# Download and extract the latest release of llama.cpp Windows binaries
 def download_llama_release():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -45,7 +43,6 @@ def download_llama_release():
     else:
         print("Failed to fetch the latest release information.")
-# Download and extract the Cuda .dll resources if they aren't present in the bin folder
 def download_cudart_if_necessary(latest_release_tag):
     base_dir = os.path.dirname(os.path.abspath(__file__))
     cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -55,7 +52,6 @@ def download_cudart_if_necessary(latest_release_tag):
     cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
     cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
-    # Check if all required files exist
     if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
         print("Cuda resources already exist. Skipping download.")
     else:
@@ -71,7 +67,6 @@ def download_cudart_if_necessary(latest_release_tag):
         else:
             print("Failed to download the cudart release file.")
-# Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
 def download_model_repo():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     models_dir = os.path.join(base_dir, "models")
@@ -82,36 +77,28 @@ def download_model_repo():
     model_name = model_id.split("/")[-1]
     model_dir = os.path.join(models_dir, model_name)
-    # Check if the model repository already exists
     if os.path.exists(model_dir):
         print("Model repository already exists. Using existing repository.")
-        # If the model already exists, prompt the user if they want to delete the model directory
         delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
-        # Ask for the name of the imatrix.txt file
         imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
-        # Convert the existing model to GGUF F16 format and generate imatrix.dat
         convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
     else:
         revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
-        # Ask the user if they want to remove the HF model folder after conversion
         delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
         print("Downloading model repository...")
         snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
         print("Model repository downloaded successfully.")
-        # Ask for the name of the imatrix.txt file
         imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
-        # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
         convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
-# Convert the downloaded model to GGUF F16 format
 def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
     convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
@@ -120,38 +107,30 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir,
     if not os.path.exists(gguf_dir):
         os.makedirs(gguf_dir)
-    # Check if F16 file already exists
     if not os.path.exists(gguf_model_path):
-        # Execute the conversion command
         subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
-        # Delete the original model directory under conditions
         if delete_model_dir == 'yes' or delete_model_dir == 'y':
             shutil.rmtree(model_dir)
             print(f"Original model directory '{model_dir}' deleted.")
         else:
             print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
-    # Generate imatrix.dat if it doesn't exist
     imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
-    imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
-    imatrix_txt = os.path.join(base_dir, "imatrix", imatrix_file_name)
-    if not os.path.exists(imatrix_output):
-        # Execute the imatrix command
-        subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "8"], cwd=gguf_dir)
-        # Move the imatrix.dat file to the GGUF folder
-        if os.path.exists(os.path.join(gguf_dir, "imatrix.dat")):
-            shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
-            print("imatrix.dat generated successfully.")
-        else:
-            print("Failed to generate imatrix.dat file.")
     else:
-        print("Skipping imatrix generation as imatrix.dat already exists.")
-    # Quantize the models
     quantize_models(base_dir, model_name)
-# Quantize models with different options
 def quantize_models(base_dir, model_name):
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
@@ -174,7 +153,6 @@ def quantize_models(base_dir, model_name):
                         f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
         print(f"Model quantized with {quant_option} option.")
-# Main function - Steps
 def main():
     clone_or_update_llama_cpp()
     latest_release_tag = download_llama_release()

 import shutil
 from huggingface_hub import snapshot_download
 def clone_or_update_llama_cpp():
     print("Preparing...")
     base_dir = os.path.dirname(os.path.abspath(__file__))
     os.chdir(base_dir)
     print("The 'llama.cpp' repository is ready.")
 def download_llama_release():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     dl_dir = os.path.join(base_dir, "bin", "dl")
     else:
         print("Failed to fetch the latest release information.")
 def download_cudart_if_necessary(latest_release_tag):
     base_dir = os.path.dirname(os.path.abspath(__file__))
     cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
     cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
     cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
     if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
         print("Cuda resources already exist. Skipping download.")
     else:
         else:
             print("Failed to download the cudart release file.")
 def download_model_repo():
     base_dir = os.path.dirname(os.path.abspath(__file__))
     models_dir = os.path.join(base_dir, "models")
     model_name = model_id.split("/")[-1]
     model_dir = os.path.join(models_dir, model_name)
     if os.path.exists(model_dir):
         print("Model repository already exists. Using existing repository.")
         delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
         imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
         convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
     else:
         revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
         delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
         print("Downloading model repository...")
         snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
         print("Model repository downloaded successfully.")
         imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
         convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
 def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
     convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     if not os.path.exists(gguf_dir):
         os.makedirs(gguf_dir)
     if not os.path.exists(gguf_model_path):
         subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
         if delete_model_dir == 'yes' or delete_model_dir == 'y':
             shutil.rmtree(model_dir)
             print(f"Original model directory '{model_dir}' deleted.")
         else:
             print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
     imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
+    imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
+    imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
+    if not os.path.exists(imatrix_output_dst):
+        try:
+            subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", os.path.join(base_dir, "imatrix", imatrix_file_name), "-ngl", "8"], cwd=gguf_dir)
+            shutil.move(imatrix_output_src, imatrix_output_dst)
+            print("imatrix.dat moved successfully.")
+        except Exception as e:
+            print("Error occurred while moving imatrix.dat:", e)
     else:
+        print("imatrix.dat already exists in the GGUF folder.")
     quantize_models(base_dir, model_name)
 def quantize_models(base_dir, model_name):
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
                         f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
         print(f"Model quantized with {quant_option} option.")
 def main():
     clone_or_update_llama_cpp()
     latest_release_tag = download_llama_release()