import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import gradio as gr

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_id = "Narrativaai/BioGPT-Large-finetuned-chatdoctor"

tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large")

model = AutoModelForCausalLM.from_pretrained(model_id)

# Move the model to the device
model = model.to(device)
model.eval()  # Set the model to evaluation mode

def answer_question(
        prompt,
        temperature=0.1,
        top_p=0.75,
        top_k=40,
        num_beams=2,
        do_sample=True,
        **kwargs,
):
    with torch.no_grad():  # Disable gradient calculation
        inputs = tokenizer(prompt, return_tensors="pt")
        # Move the inputs to the device
        inputs = {key: val.to(device) for key, val in inputs.items()}
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        generation_config = GenerationConfig(
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            num_beams=num_beams,
            do_sample=do_sample,
            **kwargs,
        )
        generation_output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=512,
            eos_token_id=tokenizer.eos_token_id
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s, skip_special_tokens=True)
    return output.split(" Response:")[1]

def gui_interface(prompt):
    prompt="""
    Below is an instruction that describes a task, paired with an input that provides further context.Write a response that appropriately completes the request.
    
    ### Instruction:
    If you are a doctor, please answer the medical questions based on the patient's description.
    
    ### Input:
    """+prompt+"""
    ### Response:
    """
    return answer_question(prompt)

iface = gr.Interface(fn=gui_interface, inputs="text", outputs="text")
iface.launch()