File size: 2,111 Bytes
9781d5d
97822ab
cab1220
97822ab
 
3547e01
 
97822ab
 
9781d5d
 
9af533e
97822ab
 
 
 
9781d5d
f8d2e9f
9781d5d
f8d2e9f
443561d
97822ab
9781d5d
97822ab
 
 
 
 
9781d5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12555c1
e4c43f7
 
 
989f6d4
9781d5d
97822ab
 
9781d5d
 
97822ab
 
 
 
9781d5d
 
97822ab
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel, PeftConfig
import gradio as gr
import os
import huggingface 
from huggingface_hub import login


# using hf token to login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(hf_token)

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t',token=hf_token, trust_remote_code=True)
config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t",token=hf_token, trust_remote_code=True)
model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1",token=hf_token)

model.to(device)

class ChatBot:
    def __init__(self):
        self.history = []

    def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
        prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
        inputs = tokenizer.apply_chat_template(
            prompt,
            add_generation_prompt=True,
            return_tensors='pt'
        )

        # Generate a response using the model
        tokens = model.generate(
            inputs.to(model.device),
            max_new_tokens=250,
            temperature=0.8,
            do_sample=False
        )

        # Decode the response
        response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)

        # Free up memory
        del tokens
        torch.cuda.empty_cache()

        return response_text

bot = ChatBot()

title = "👋🏻Welcome to StableLM MED chat"
description = """
"""
examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]

iface = gr.Interface(
    fn=bot.predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "text"],
    outputs="text",
    theme="ParityError/Anime"
)

iface.launch()