balkite commited on
Commit
7bbc6cc
1 Parent(s): 38f1be1

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +16 -16
backend.py CHANGED
@@ -1,6 +1,6 @@
1
  import time
2
  import torch
3
- from auto_gptq import AutoGPTQForCausalLM
4
  from huggingface_hub import hf_hub_download
5
  from langchain.chains import RetrievalQA
6
  from langchain.embeddings import HuggingFaceInstructEmbeddings
@@ -34,21 +34,21 @@ def load_model(model_id, model_basename=None):
34
  "n_batch": max_ctx_size}
35
  return LlamaCpp(**kwargs)
36
 
37
- else:
38
- if ".safetensors" in model_basename:
39
- model_basename = model_basename.replace(".safetensors", "")
40
-
41
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
42
-
43
- model = AutoGPTQForCausalLM.from_quantized(
44
- model_id,
45
- model_basename=model_basename,
46
- use_safetensors=True,
47
- trust_remote_code=True,
48
- device="cuda:0",
49
- use_triton=False,
50
- quantize_config=None,
51
- )
52
  else:
53
  tokenizer = AutoTokenizer.from_pretrained(model_id)
54
 
 
1
  import time
2
  import torch
3
+ # from auto_gptq import AutoGPTQForCausalLM
4
  from huggingface_hub import hf_hub_download
5
  from langchain.chains import RetrievalQA
6
  from langchain.embeddings import HuggingFaceInstructEmbeddings
 
34
  "n_batch": max_ctx_size}
35
  return LlamaCpp(**kwargs)
36
 
37
+ # else:
38
+ # if ".safetensors" in model_basename:
39
+ # model_basename = model_basename.replace(".safetensors", "")
40
+
41
+ # tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
42
+
43
+ # model = AutoGPTQForCausalLM.from_quantized(
44
+ # model_id,
45
+ # model_basename=model_basename,
46
+ # use_safetensors=True,
47
+ # trust_remote_code=True,
48
+ # device="cuda:0",
49
+ # use_triton=False,
50
+ # quantize_config=None,
51
+ # )
52
  else:
53
  tokenizer = AutoTokenizer.from_pretrained(model_id)
54