stablemed2 / app.py
vaishakgkumar's picture
Update app.py
f8d2e9f
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel, PeftConfig
import gradio as gr
import os
import huggingface
from huggingface_hub import login
# using hf token to login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(hf_token)
# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t',token=hf_token, trust_remote_code=True)
config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t",token=hf_token, trust_remote_code=True)
model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1")
model.to(device)
class ChatBot:
def __init__(self):
self.history = []
def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
inputs = tokenizer.apply_chat_template(
prompt,
add_generation_prompt=True,
return_tensors='pt'
)
# Generate a response using the model
tokens = model.generate(
inputs.to(model.device),
max_new_tokens=250,
temperature=0.8,
do_sample=False
)
# Decode the response
response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
# Free up memory
del tokens
torch.cuda.empty_cache()
return response_text
bot = ChatBot()
title = "👋🏻Welcome to StableLM MED chat"
description = """
"""
examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]
iface = gr.Interface(
fn=bot.predict,
title=title,
description=description,
examples=examples,
inputs=["text", "text"],
outputs="text",
theme="ParityError/Anime"
)
iface.launch()