Spaces:
Running
on
Zero
Running
on
Zero
Daemontatox
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -123,10 +123,10 @@ h3 {
|
|
123 |
def initialize_model():
|
124 |
"""Initialize the model with appropriate configurations"""
|
125 |
quantization_config = BitsAndBytesConfig(
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
)
|
131 |
|
132 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
@@ -139,7 +139,7 @@ def initialize_model():
|
|
139 |
device_map="cuda",
|
140 |
# attn_implementation="flash_attention_2",
|
141 |
trust_remote_code=True,
|
142 |
-
|
143 |
|
144 |
)
|
145 |
|
|
|
123 |
def initialize_model():
|
124 |
"""Initialize the model with appropriate configurations"""
|
125 |
quantization_config = BitsAndBytesConfig(
|
126 |
+
load_in_8bit=True,
|
127 |
+
bnb_8bit_compute_dtype=torch.bfloat16,
|
128 |
+
bnb_8bit_quant_type="nf4",
|
129 |
+
bnb_8bit_use_double_quant=True
|
130 |
)
|
131 |
|
132 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
|
|
139 |
device_map="cuda",
|
140 |
# attn_implementation="flash_attention_2",
|
141 |
trust_remote_code=True,
|
142 |
+
quantization_config=quantization_config
|
143 |
|
144 |
)
|
145 |
|