Ffftdtd5dtft commited on
Commit
106dcad
1 Parent(s): bc1d53e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -26
app.py CHANGED
@@ -3,7 +3,7 @@ import shutil
3
  import subprocess
4
  import torch
5
  from transformers import AutoConfig, AutoModelForCausalLM
6
- from huggingface_hub import HfApi, snapshot_download, whoami, ModelCard
7
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from textwrap import dedent
@@ -272,31 +272,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
272
  if result.returncode != 0:
273
  raise Exception(f"Error converting to fp16: {result.stderr}")
274
 
275
- config = AutoConfig.from_pretrained(model_name)
276
- model = AutoModelForCausalLM.from_pretrained(model_name, config=config, torch_dtype=torch.float16)
277
-
278
- model = optimize_model_resources(model)
279
- model = apply_quantization(model, use_int8_inference=True)
280
- model = reduce_layers(model, reduction_factor=0.5)
281
- model = use_smaller_embeddings(model, reduction_factor=0.75)
282
- model = use_fp16_embeddings(model)
283
- model = quantize_embeddings(model)
284
- model = use_bnb_f16(model)
285
- model = use_group_quantization(model)
286
- model = apply_layer_norm_trick(model)
287
- model = use_selective_quantization(model)
288
- model = use_mixed_precision(model)
289
- model = use_pruning_after_training(model, prune_amount=0.1)
290
- model = use_weight_sharing(model)
291
- model = use_low_rank_approximation(model, rank_factor=0.5)
292
- model = use_quantization_aware_training(model)
293
- model = use_gradient_checkpointing(model)
294
- model = use_channel_pruning(model, prune_amount=0.1)
295
- model = use_sparse_tensors(model, sparsity_threshold=0.01)
296
- model = use_hashing_trick(model, num_hashes=1024)
297
-
298
- model.save_pretrained(model_name)
299
-
300
  imatrix_path = "llama.cpp/imatrix.dat"
301
  if use_imatrix:
302
  if train_data_file:
 
3
  import subprocess
4
  import torch
5
  from transformers import AutoConfig, AutoModelForCausalLM
6
+ from huggingface_hub import HfApi, whoami, ModelCard
7
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from textwrap import dedent
 
272
  if result.returncode != 0:
273
  raise Exception(f"Error converting to fp16: {result.stderr}")
274
 
275
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  imatrix_path = "llama.cpp/imatrix.dat"
277
  if use_imatrix:
278
  if train_data_file: