import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces

# Initialize model
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "linjc16/Panacea-7B-Chat"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
model.to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

@spaces.GPU
def generate_response(system_instruction, user_input):
    # Format the prompt using the messages structure
    messages = [
        {"role": "system", "content": system_instruction},
        {"role": "user", "content": user_input},
    ]
    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
    model_inputs = encodeds.to(device)

    # Generate model response
    with torch.no_grad():
        generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0].split("Bot:")[-1].strip()

    return response

# Gradio interface setup
with gr.Blocks() as demo:
    gr.Markdown("# Clinical Trial Chatbot")

    with gr.Row():
        # Left sidebar for inputs
        with gr.Column():
            system_instruction = gr.Textbox(
                placeholder="Enter system instruction here...", label="System Instruction")
            user_input = gr.Textbox(
                placeholder="Type your message here...", label="Your Message")
            submit_btn = gr.Button("Submit")

        # Right column for displaying bot response
        with gr.Column():
            response_display = gr.Textbox(
                label="Bot Response", interactive=False, placeholder="Response will appear here.")

    # Link submit button to the generate_response function
    submit_btn.click(generate_response, [system_instruction, user_input], response_display)

# Launch the app
demo.launch(share=True)