import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import spaces # Initialize model device = "cuda" if torch.cuda.is_available() else "cpu" model_name = "linjc16/Panacea-7B-Chat" model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) model.to(device) tokenizer = AutoTokenizer.from_pretrained(model_name) @spaces.GPU def generate_response(system_instruction, user_input): # Format the prompt using the messages structure messages = [ {"role": "system", "content": system_instruction}, {"role": "user", "content": user_input}, ] encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device) model_inputs = encodeds.to(device) # Generate model response with torch.no_grad(): generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0].split("Bot:")[-1].strip() return response # Gradio interface setup with gr.Blocks() as demo: gr.Markdown("# Clinical Trial Chatbot") with gr.Row(): # Left sidebar for inputs with gr.Column(): system_instruction = gr.Textbox( placeholder="Enter system instruction here...", label="System Instruction") user_input = gr.Textbox( placeholder="Type your message here...", label="Your Message") submit_btn = gr.Button("Submit") # Right column for displaying bot response with gr.Column(): response_display = gr.Textbox( label="Bot Response", interactive=False, placeholder="Response will appear here.") # Link submit button to the generate_response function submit_btn.click(generate_response, [system_instruction, user_input], response_display) # Launch the app demo.launch(share=True)