stablemed2 / app.py
vaishakgkumar's picture
Update app.py
443561d
raw history blame
No virus
2.11 kB
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel, PeftConfig
import gradio as gr
import os
import huggingface
from huggingface_hub import login
# using hf token to login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(hf_token)
# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t',token=hf_token, trust_remote_code=True)
config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t",token=hf_token, trust_remote_code=True)
model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1",token=hf_token)
model.to(device)
class ChatBot:
def __init__(self):
self.history = []
def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
inputs = tokenizer.apply_chat_template(
prompt,
add_generation_prompt=True,
return_tensors='pt'
)
# Generate a response using the model
tokens = model.generate(
inputs.to(model.device),
max_new_tokens=250,
temperature=0.8,
do_sample=False
)
# Decode the response
response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
# Free up memory
del tokens
torch.cuda.empty_cache()
return response_text
bot = ChatBot()
title = "👋🏻Welcome to StableLM MED chat"
description = """
"""
examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]
iface = gr.Interface(
fn=bot.predict,
title=title,
description=description,
examples=examples,
inputs=["text", "text"],
outputs="text",
theme="ParityError/Anime"
)
iface.launch()