ChatDoctor / app.py
MohammedAlakhras's picture
Update app.py
58e74a8 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import gradio as gr
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = "Narrativaai/BioGPT-Large-finetuned-chatdoctor"
tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large")
model = AutoModelForCausalLM.from_pretrained(model_id)
# Move the model to the device
model = model.to(device)
model.eval() # Set the model to evaluation mode
def answer_question(
prompt,
temperature=0.1,
top_p=0.75,
top_k=40,
num_beams=2,
do_sample=True,
**kwargs,
):
with torch.no_grad(): # Disable gradient calculation
inputs = tokenizer(prompt, return_tensors="pt")
# Move the inputs to the device
inputs = {key: val.to(device) for key, val in inputs.items()}
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
generation_config = GenerationConfig(
temperature=temperature,
top_p=top_p,
top_k=top_k,
num_beams=num_beams,
do_sample=do_sample,
**kwargs,
)
generation_output = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True,
max_new_tokens=512,
eos_token_id=tokenizer.eos_token_id
)
s = generation_output.sequences[0]
output = tokenizer.decode(s, skip_special_tokens=True)
return output.split(" Response:")[1]
def gui_interface(prompt):
prompt="""
Below is an instruction that describes a task, paired with an input that provides further context.Write a response that appropriately completes the request.
### Instruction:
If you are a doctor, please answer the medical questions based on the patient's description.
### Input:
"""+prompt+"""
### Response:
"""
return answer_question(prompt)
iface = gr.Interface(fn=gui_interface, inputs="text", outputs="text")
iface.launch()