SixOpen commited on
Commit
4da3d6d
1 Parent(s): ca38009

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -28
app.py CHANGED
@@ -29,7 +29,7 @@ def script_to_use(model_id, api):
29
  return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
30
 
31
  def generate_importance_matrix(model_path, train_data_path):
32
- imatrix_command = f"./imatrix -m ../{model_path} -f {train_data_path} -ngl 0" #No GPU on the basic spaces unlike main, it works regardless but takes >2 hours
33
 
34
  os.chdir("llama.cpp")
35
 
@@ -134,32 +134,19 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
134
  imatrix_path = "llama.cpp/imatrix.dat"
135
  use_imatrix = q_method.startswith("IQ")
136
 
137
- if use_imatrix:
138
- if train_data_file:
139
-
140
- train_data_path = train_data_file.name
141
-
142
-
143
- print(f"Training data file path: {train_data_path}")
144
-
145
-
146
- if not os.path.isfile(train_data_path):
147
- raise Exception(f"Training data file not found: {train_data_path}")
148
- else:
149
- # for now it's a decent fallback/default
150
- train_data_path = "imatrix_calibration.txt"
151
-
152
-
153
- print(f"Using fallback training data file: {train_data_path}")
154
-
155
-
156
- if not os.path.isfile(train_data_path):
157
- raise Exception(f"Fallback training data file not found: {train_data_path}")
158
 
159
  generate_importance_matrix(fp16, train_data_path)
160
  else:
161
- print("Not using imatrix quantization. Skipping importance matrix generation.")
162
-
163
 
164
  username = whoami(oauth_token.token)["name"]
165
  quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf"
@@ -169,12 +156,10 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
169
  else:
170
  quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
171
 
172
-
173
  print(f"Quantization command: {quantise_ggml}")
174
 
175
  result = subprocess.run(quantise_ggml, shell=True, capture_output=True, text=True)
176
 
177
-
178
  print(f"Quantization command stdout: {result.stdout}")
179
  print(f"Quantization command stderr: {result.stderr}")
180
 
@@ -183,7 +168,6 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
183
  print(f"Quantized successfully with {q_method} option!")
184
  print(f"Quantized model path: {quantized_gguf_path}")
185
 
186
- # Create empty repo
187
  new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-imat.gguf", exist_ok=True, private=private_repo)
188
  new_repo_id = new_repo_url.repo_id
189
  print("Repo created successfully!", new_repo_url)
@@ -239,7 +223,7 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
239
  except Exception as e:
240
  raise Exception(f"Error uploading quantized model: {e}")
241
 
242
-
243
  imatrix_path = "llama.cpp/imatrix.dat"
244
  if os.path.isfile(imatrix_path):
245
  try:
 
29
  return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
30
 
31
  def generate_importance_matrix(model_path, train_data_path):
32
+ imatrix_command = f"./imatrix -m ../{model_path} -f {train_data_path} -ngl 0" #No GPU on the basic spaces unlike main, it works regardless but takes >2 hours
33
 
34
  os.chdir("llama.cpp")
35
 
 
134
  imatrix_path = "llama.cpp/imatrix.dat"
135
  use_imatrix = q_method.startswith("IQ")
136
 
137
+ if train_data_file and use_imatrix:
138
+
139
+ train_data_path = train_data_file.name
140
+
141
+
142
+ print(f"Training data file path: {train_data_path}")
143
+
144
+ if not os.path.isfile(train_data_path):
145
+ raise Exception(f"Training data file not found: {train_data_path}")
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  generate_importance_matrix(fp16, train_data_path)
148
  else:
149
+ print("No training data file provided or not using imatrix quantization.")
 
150
 
151
  username = whoami(oauth_token.token)["name"]
152
  quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf"
 
156
  else:
157
  quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
158
 
 
159
  print(f"Quantization command: {quantise_ggml}")
160
 
161
  result = subprocess.run(quantise_ggml, shell=True, capture_output=True, text=True)
162
 
 
163
  print(f"Quantization command stdout: {result.stdout}")
164
  print(f"Quantization command stderr: {result.stderr}")
165
 
 
168
  print(f"Quantized successfully with {q_method} option!")
169
  print(f"Quantized model path: {quantized_gguf_path}")
170
 
 
171
  new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-imat.gguf", exist_ok=True, private=private_repo)
172
  new_repo_id = new_repo_url.repo_id
173
  print("Repo created successfully!", new_repo_url)
 
223
  except Exception as e:
224
  raise Exception(f"Error uploading quantized model: {e}")
225
 
226
+ # Upload imatrix.dat if it exists
227
  imatrix_path = "llama.cpp/imatrix.dat"
228
  if os.path.isfile(imatrix_path):
229
  try: