|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import spaces |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model_name = "linjc16/Panacea-7B-Chat" |
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) |
|
model.to(device) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
@spaces.GPU |
|
def generate_response(system_instruction, user_input): |
|
|
|
messages = [ |
|
{"role": "system", "content": system_instruction}, |
|
{"role": "user", "content": user_input}, |
|
] |
|
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device) |
|
model_inputs = encodeds.to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) |
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0].split("Bot:")[-1].strip() |
|
|
|
return response |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Clinical Trial Chatbot") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
system_instruction = gr.Textbox( |
|
placeholder="Enter system instruction here...", label="System Instruction") |
|
user_input = gr.Textbox( |
|
placeholder="Type your message here...", label="Your Message") |
|
submit_btn = gr.Button("Submit") |
|
|
|
|
|
with gr.Column(): |
|
response_display = gr.Textbox( |
|
label="Bot Response", interactive=False, placeholder="Response will appear here.") |
|
|
|
|
|
submit_btn.click(generate_response, [system_instruction, user_input], response_display) |
|
|
|
|
|
demo.launch(share=True) |
|
|