minhdang commited on
Commit
76de27f
1 Parent(s): 175bb86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -24,12 +24,12 @@ model_id = "Nexusflow/Starling-LM-7B-beta"
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
  model = AutoModelForCausalLM.from_pretrained(model_id,
26
  # load_in_8bit=True,
27
- # quantization_config=nf4_config,
28
  torch_dtype = torch.bfloat16,
29
  # device_map="auto"
30
  )
31
 
32
- replace_linears_in_hf(model)
33
  model.to('cuda').eval()
34
  @spaces.GPU
35
  def generate_response(user_input, max_new_tokens, temperature):
 
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
  model = AutoModelForCausalLM.from_pretrained(model_id,
26
  # load_in_8bit=True,
27
+ quantization_config=nf4_config,
28
  torch_dtype = torch.bfloat16,
29
  # device_map="auto"
30
  )
31
 
32
+ # replace_linears_in_hf(model)
33
  model.to('cuda').eval()
34
  @spaces.GPU
35
  def generate_response(user_input, max_new_tokens, temperature):