Daemontatox commited on
Commit
3ccf63d
·
verified ·
1 Parent(s): f5773a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -123,10 +123,10 @@ h3 {
123
  def initialize_model():
124
  """Initialize the model with appropriate configurations"""
125
  quantization_config = BitsAndBytesConfig(
126
- load_in_4bit=True,
127
- bnb_4bit_compute_dtype=torch.bfloat16,
128
- bnb_4bit_quant_type="nf4",
129
- bnb_4bit_use_double_quant=True
130
  )
131
 
132
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
@@ -139,7 +139,7 @@ def initialize_model():
139
  device_map="cuda",
140
  # attn_implementation="flash_attention_2",
141
  trust_remote_code=True,
142
- #quantization_config=quantization_config
143
 
144
  )
145
 
 
123
  def initialize_model():
124
  """Initialize the model with appropriate configurations"""
125
  quantization_config = BitsAndBytesConfig(
126
+ load_in_8bit=True,
127
+ bnb_8bit_compute_dtype=torch.bfloat16,
128
+ bnb_8bit_quant_type="nf4",
129
+ bnb_8bit_use_double_quant=True
130
  )
131
 
132
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
 
139
  device_map="cuda",
140
  # attn_implementation="flash_attention_2",
141
  trust_remote_code=True,
142
+ quantization_config=quantization_config
143
 
144
  )
145