Spaces:
Running
Running
Ffftdtd5dtft
commited on
Commit
•
106dcad
1
Parent(s):
bc1d53e
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import shutil
|
|
3 |
import subprocess
|
4 |
import torch
|
5 |
from transformers import AutoConfig, AutoModelForCausalLM
|
6 |
-
from huggingface_hub import HfApi,
|
7 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
8 |
from apscheduler.schedulers.background import BackgroundScheduler
|
9 |
from textwrap import dedent
|
@@ -272,31 +272,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
272 |
if result.returncode != 0:
|
273 |
raise Exception(f"Error converting to fp16: {result.stderr}")
|
274 |
|
275 |
-
|
276 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, config=config, torch_dtype=torch.float16)
|
277 |
-
|
278 |
-
model = optimize_model_resources(model)
|
279 |
-
model = apply_quantization(model, use_int8_inference=True)
|
280 |
-
model = reduce_layers(model, reduction_factor=0.5)
|
281 |
-
model = use_smaller_embeddings(model, reduction_factor=0.75)
|
282 |
-
model = use_fp16_embeddings(model)
|
283 |
-
model = quantize_embeddings(model)
|
284 |
-
model = use_bnb_f16(model)
|
285 |
-
model = use_group_quantization(model)
|
286 |
-
model = apply_layer_norm_trick(model)
|
287 |
-
model = use_selective_quantization(model)
|
288 |
-
model = use_mixed_precision(model)
|
289 |
-
model = use_pruning_after_training(model, prune_amount=0.1)
|
290 |
-
model = use_weight_sharing(model)
|
291 |
-
model = use_low_rank_approximation(model, rank_factor=0.5)
|
292 |
-
model = use_quantization_aware_training(model)
|
293 |
-
model = use_gradient_checkpointing(model)
|
294 |
-
model = use_channel_pruning(model, prune_amount=0.1)
|
295 |
-
model = use_sparse_tensors(model, sparsity_threshold=0.01)
|
296 |
-
model = use_hashing_trick(model, num_hashes=1024)
|
297 |
-
|
298 |
-
model.save_pretrained(model_name)
|
299 |
-
|
300 |
imatrix_path = "llama.cpp/imatrix.dat"
|
301 |
if use_imatrix:
|
302 |
if train_data_file:
|
|
|
3 |
import subprocess
|
4 |
import torch
|
5 |
from transformers import AutoConfig, AutoModelForCausalLM
|
6 |
+
from huggingface_hub import HfApi, whoami, ModelCard
|
7 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
8 |
from apscheduler.schedulers.background import BackgroundScheduler
|
9 |
from textwrap import dedent
|
|
|
272 |
if result.returncode != 0:
|
273 |
raise Exception(f"Error converting to fp16: {result.stderr}")
|
274 |
|
275 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
imatrix_path = "llama.cpp/imatrix.dat"
|
277 |
if use_imatrix:
|
278 |
if train_data_file:
|