gguf-my-repo-sp_imat

Sleeping

App Files Files Community

SixOpen commited on May 20

Commit

4da3d6d

•

1 Parent(s): ca38009

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -28

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def script_to_use(model_id, api):
     return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
 def generate_importance_matrix(model_path, train_data_path):
-    imatrix_command = f"./imatrix -m ../{model_path} -f {train_data_path} -ngl 0"   #No GPU on the basic spaces unlike main, it works regardless but takes >2 hours
     os.chdir("llama.cpp")
@@ -134,32 +134,19 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
         imatrix_path = "llama.cpp/imatrix.dat"
         use_imatrix = q_method.startswith("IQ")
-        if use_imatrix:
-            if train_data_file:
-                train_data_path = train_data_file.name
-                print(f"Training data file path: {train_data_path}")
-                if not os.path.isfile(train_data_path):
-                    raise Exception(f"Training data file not found: {train_data_path}")
-            else:
-                # for now it's a decent fallback/default
-                train_data_path = "imatrix_calibration.txt"
-                print(f"Using fallback training data file: {train_data_path}")
-                if not os.path.isfile(train_data_path):
-                    raise Exception(f"Fallback training data file not found: {train_data_path}")
             generate_importance_matrix(fp16, train_data_path)
         else:
-            print("Not using imatrix quantization. Skipping importance matrix generation.")
         username = whoami(oauth_token.token)["name"]
         quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf"
@@ -169,12 +156,10 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
         else:
             quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
         print(f"Quantization command: {quantise_ggml}")
         result = subprocess.run(quantise_ggml, shell=True, capture_output=True, text=True)
         print(f"Quantization command stdout: {result.stdout}")
         print(f"Quantization command stderr: {result.stderr}")
@@ -183,7 +168,6 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
         print(f"Quantized successfully with {q_method} option!")
         print(f"Quantized model path: {quantized_gguf_path}")
-        # Create empty repo
         new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-imat.gguf", exist_ok=True, private=private_repo)
         new_repo_id = new_repo_url.repo_id
         print("Repo created successfully!", new_repo_url)
@@ -239,7 +223,7 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
             except Exception as e:
                 raise Exception(f"Error uploading quantized model: {e}")
         imatrix_path = "llama.cpp/imatrix.dat"
         if os.path.isfile(imatrix_path):
             try:

     return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
 def generate_importance_matrix(model_path, train_data_path):
+    imatrix_command = f"./imatrix -m ../{model_path} -f {train_data_path} -ngl 0"  #No GPU on the basic spaces unlike main, it works regardless but takes >2 hours
     os.chdir("llama.cpp")
         imatrix_path = "llama.cpp/imatrix.dat"
         use_imatrix = q_method.startswith("IQ")
+        if train_data_file and use_imatrix:
+            train_data_path = train_data_file.name
+            print(f"Training data file path: {train_data_path}")
+            if not os.path.isfile(train_data_path):
+                raise Exception(f"Training data file not found: {train_data_path}")
             generate_importance_matrix(fp16, train_data_path)
         else:
+            print("No training data file provided or not using imatrix quantization.")
         username = whoami(oauth_token.token)["name"]
         quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf"
         else:
             quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
         print(f"Quantization command: {quantise_ggml}")
         result = subprocess.run(quantise_ggml, shell=True, capture_output=True, text=True)
         print(f"Quantization command stdout: {result.stdout}")
         print(f"Quantization command stderr: {result.stderr}")
         print(f"Quantized successfully with {q_method} option!")
         print(f"Quantized model path: {quantized_gguf_path}")
         new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-imat.gguf", exist_ok=True, private=private_repo)
         new_repo_id = new_repo_url.repo_id
         print("Repo created successfully!", new_repo_url)
             except Exception as e:
                 raise Exception(f"Error uploading quantized model: {e}")
+        # Upload imatrix.dat if it exists
         imatrix_path = "llama.cpp/imatrix.dat"
         if os.path.isfile(imatrix_path):
             try: