gguf-my-repo

Runtime error

App Files Files Community

Ffftdtd5dtft commited on Sep 4, 2024

Commit

106dcad

verified ·

1 Parent(s): bc1d53e

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -26

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import shutil
 import subprocess
 import torch
 from transformers import AutoConfig, AutoModelForCausalLM
-from huggingface_hub import HfApi, snapshot_download, whoami, ModelCard
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from apscheduler.schedulers.background import BackgroundScheduler
 from textwrap import dedent
@@ -272,31 +272,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
         if result.returncode != 0:
             raise Exception(f"Error converting to fp16: {result.stderr}")
-        config = AutoConfig.from_pretrained(model_name)
-        model = AutoModelForCausalLM.from_pretrained(model_name, config=config, torch_dtype=torch.float16)
-        model = optimize_model_resources(model)
-        model = apply_quantization(model, use_int8_inference=True)
-        model = reduce_layers(model, reduction_factor=0.5)
-        model = use_smaller_embeddings(model, reduction_factor=0.75)
-        model = use_fp16_embeddings(model)
-        model = quantize_embeddings(model)
-        model = use_bnb_f16(model)
-        model = use_group_quantization(model)
-        model = apply_layer_norm_trick(model)
-        model = use_selective_quantization(model)
-        model = use_mixed_precision(model)
-        model = use_pruning_after_training(model, prune_amount=0.1)
-        model = use_weight_sharing(model)
-        model = use_low_rank_approximation(model, rank_factor=0.5)
-        model = use_quantization_aware_training(model)
-        model = use_gradient_checkpointing(model)
-        model = use_channel_pruning(model, prune_amount=0.1)
-        model = use_sparse_tensors(model, sparsity_threshold=0.01)
-        model = use_hashing_trick(model, num_hashes=1024)
-        model.save_pretrained(model_name)
         imatrix_path = "llama.cpp/imatrix.dat"
         if use_imatrix:
             if train_data_file:

 import subprocess
 import torch
 from transformers import AutoConfig, AutoModelForCausalLM
+from huggingface_hub import HfApi, whoami, ModelCard
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from apscheduler.schedulers.background import BackgroundScheduler
 from textwrap import dedent
         if result.returncode != 0:
             raise Exception(f"Error converting to fp16: {result.stderr}")
         imatrix_path = "llama.cpp/imatrix.dat"
         if use_imatrix:
             if train_data_file: