Spaces:

vaishakgkumar
/

stablemed2

Runtime error

File size: 2,111 Bytes

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel, PeftConfig
import gradio as gr
import os
import huggingface 
from huggingface_hub import login


# using hf token to login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(hf_token)

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t',token=hf_token, trust_remote_code=True)
config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t",token=hf_token, trust_remote_code=True)
model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1",token=hf_token)

model.to(device)

class ChatBot:
    def __init__(self):
        self.history = []

    def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
        prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
        inputs = tokenizer.apply_chat_template(
            prompt,
            add_generation_prompt=True,
            return_tensors='pt'
        )

        # Generate a response using the model
        tokens = model.generate(
            inputs.to(model.device),
            max_new_tokens=250,
            temperature=0.8,
            do_sample=False
        )

        # Decode the response
        response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)

        # Free up memory
        del tokens
        torch.cuda.empty_cache()

        return response_text

bot = ChatBot()

title = "👋🏻Welcome to StableLM MED chat"
description = """
"""
examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]

iface = gr.Interface(
    fn=bot.predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "text"],
    outputs="text",
    theme="ParityError/Anime"
)

iface.launch()