cemt commited on
Commit
6aa71ed
1 Parent(s): 50f73af

Change quantization

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -100,8 +100,7 @@ def predict(
100
  # Load model
101
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
  quantization_config = BitsAndBytesConfig(
103
- # load_in_8bit=True,
104
- bnb_4bit_compute_dtype=torch.bfloat16
105
  )
106
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
107
  model = AutoModelForCausalLM.from_pretrained(
 
100
  # Load model
101
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
  quantization_config = BitsAndBytesConfig(
103
+ load_in_8bit=False, bnb_4bit_compute_dtype=torch.bfloat16
 
104
  )
105
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
106
  model = AutoModelForCausalLM.from_pretrained(