Spaces:

PingAndPasquale
/

med-rag

Sleeping

pminervini commited on Mar 2, 2024

Commit

98602d4

1 Parent(s): ee28881

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -121,8 +121,8 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
         print('OAI_RESPONSE', openai_res)
         response = openai_res.choices[0].message.content.strip()
     else:
-        quantization_config = BitsAndBytesConfig(load_in_4bit=True)
-        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         # Load your language model from HuggingFace Transformers

         print('OAI_RESPONSE', openai_res)
         response = openai_res.choices[0].message.content.strip()
     else:
+        quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+        model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         # Load your language model from HuggingFace Transformers