import gradio as gr import time from transformers import AutoTokenizer import transformers import torch global tokenizer, pipeline model = "shaneperry0101/Health-Llama-3.2-1B" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, torch_dtype=torch.float16, device_map="auto", ) def chat_response(message): system_message="Welcome! You're now communicating with an AI model trained to assist with information about general health disease. Feel free to ask about causes, symptoms, medications, and treatment options!" prompt = f"[INST] <>\n{system_message}\n<>\n\n{message}[/INST]" sequences = pipeline( f'[INST] {prompt} [/INST]', do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, max_length=500, ) for seq in sequences: bot_message = seq['generated_text'].replace(prompt, '').split('[/INST]')[-1] return bot_message with gr.Blocks() as demo: chatbot = gr.Chatbot(label="Personal Health Assistant") msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def respond(message, chat_history): bot_message = chat_response(message) chat_history.append((message, bot_message)) time.sleep(2) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) if __name__ == "__main__": demo.launch()