Somunia commited on
Commit
dc779ad
1 Parent(s): 8bc6b74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -3
app.py CHANGED
@@ -30,9 +30,6 @@ model = AutoModelForCausalLM.from_pretrained(
30
  # use_flash_attention_2=False
31
  ).to(torch.float32)
32
 
33
- model = model.quantize(8) # Quantize to int8 (experiment with different values)
34
- model = model.to("cpu")
35
-
36
  # Create a custom tokenizer (make sure to download vocab.json)
37
  tokenizer = AutoTokenizer.from_pretrained(
38
  model_path,
 
30
  # use_flash_attention_2=False
31
  ).to(torch.float32)
32
 
 
 
 
33
  # Create a custom tokenizer (make sure to download vocab.json)
34
  tokenizer = AutoTokenizer.from_pretrained(
35
  model_path,