pminervini commited on
Commit
98602d4
·
1 Parent(s): ee28881
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -121,8 +121,8 @@ def rag_pipeline(prompt, index="pubmed", num_docs=3, model_name="HuggingFaceH4/z
121
  print('OAI_RESPONSE', openai_res)
122
  response = openai_res.choices[0].message.content.strip()
123
  else:
124
- quantization_config = BitsAndBytesConfig(load_in_4bit=True)
125
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
126
  tokenizer = AutoTokenizer.from_pretrained(model_name)
127
 
128
  # Load your language model from HuggingFace Transformers
 
121
  print('OAI_RESPONSE', openai_res)
122
  response = openai_res.choices[0].message.content.strip()
123
  else:
124
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
125
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", low_cpu_mem_usage=True, quantization_config=quantization_config)
126
  tokenizer = AutoTokenizer.from_pretrained(model_name)
127
 
128
  # Load your language model from HuggingFace Transformers