CoderCowMoo commited on
Commit
d23fc4b
β€’
1 Parent(s): cdcdc4e

disable exllamav2 and use marlin

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -16,13 +16,14 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
16
  quantize_config = BaseQuantizeConfig(
17
  bits=4,
18
  group_size=128,
19
- desc_act=False,
20
- use_exllama=False
21
  )
22
  model = AutoGPTQForCausalLM.from_quantized(
23
  model_id,
24
  device="cuda:0",
25
  use_safetensors=True,
 
 
26
  quantize_config=quantize_config).eval()
27
 
28
 
 
16
  quantize_config = BaseQuantizeConfig(
17
  bits=4,
18
  group_size=128,
19
+ desc_act=False
 
20
  )
21
  model = AutoGPTQForCausalLM.from_quantized(
22
  model_id,
23
  device="cuda:0",
24
  use_safetensors=True,
25
+ disable_exllamav2=True,
26
+ use_marlin=True,
27
  quantize_config=quantize_config).eval()
28
 
29