stablemed2 / app.py
vaishakgkumar's picture
Update app.py
989f6d4
from transformers import AutoTokenizer, MistralForCausalLM
import torch
import gradio as gr
import random
from textwrap import wrap
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import os
import huggingface
from huggingface_hub import login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(hf_token)
# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
lines = text.split('\n')
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
# Combine user input and system prompt
formatted_input = f"{user_input}{system_prompt}"
# Encode the input text
encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
model_inputs = encodeds.to(device)
# Generate a response using the model
output = model.generate(
**model_inputs,
max_length=max_length,
use_cache=True,
early_stopping=True,
bos_token_id=model.config.bos_token_id,
eos_token_id=model.config.eos_token_id,
pad_token_id=model.config.eos_token_id,
temperature=0.1,
do_sample=True
)
# Decode the response
response_text = tokenizer.decode(output[0], skip_special_tokens=True)
return response_text
# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Use the base model's ID
base_model_id = "stabilityai/stablelm-3b-4e1t"
model_directory = "vaishakgkumar/stablemedv1"
# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("vaishakgkumar/stablemedv3", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
# Load the PEFT model
peft_config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1", token=hf_token)
peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "vaishakgkumar/stablemedv1", token=hf_token)
class ChatBot:
def __init__(self):
self.history = []
def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
inputs = tokenizer.apply_chat_template(
prompt,
add_generation_prompt=True,
return_tensors='pt'
)
# Generate a response using the model
tokens = peft_model.generate(
inputs.to(model.device),
max_new_tokens=512,
temperature=0.8,
do_sample=False
)
# Decode the response
response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
# Free up memory
del tokens
torch.cuda.empty_cache()
return response_text
bot = ChatBot()
title = "StableDoc Chat"
description = """
You can use this Space to test out the current model vaishakgkumar/stablemedv3.
"""
iface = gr.Interface(
fn=bot.predict,
title=title,
description=description,
inputs=["text"], # Take user input and system prompt separately
outputs="text",
theme="ParityError/Anime"
)
iface.launch()