Spaces:
Sleeping
Sleeping
pminervini
commited on
Commit
·
98602d4
1
Parent(s):
ee28881
update
Browse files
app.py
CHANGED
@@ -121,8 +121,8 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
|
|
121 |
print('OAI_RESPONSE', openai_res)
|
122 |
response = openai_res.choices[0].message.content.strip()
|
123 |
else:
|
124 |
-
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
125 |
-
model = AutoModelForCausalLM.from_pretrained(model_name,
|
126 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
127 |
|
128 |
# Load your language model from HuggingFace Transformers
|
|
|
121 |
print('OAI_RESPONSE', openai_res)
|
122 |
response = openai_res.choices[0].message.content.strip()
|
123 |
else:
|
124 |
+
quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
|
125 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
|
126 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
127 |
|
128 |
# Load your language model from HuggingFace Transformers
|