Spaces:

vaishakgkumar
/

stablemed2

Runtime error

App Files Files Community

stablemed2 / app.py

vaishakgkumar

Update app.py

cab1220 10 months ago

raw

history blame

No virus

2.02 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	from peft import PeftModel, PeftConfig
	import gradio as gr
	import os
	import huggingface
	from huggingface_hub import login


	# using hf token to login
	hf_token = os.environ.get('HUGGINGFACE_TOKEN')
	login(hf_token)

	# Define the device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t')
	config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
	model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
	model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1")

	model.to(device)

	class ChatBot:
	def __init__(self):
	self.history = []

	def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
	prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
	inputs = tokenizer.apply_chat_template(
	prompt,
	add_generation_prompt=True,
	return_tensors='pt'
	)

	# Generate a response using the model
	tokens = model.generate(
	inputs.to(model.device),
	max_new_tokens=250,
	temperature=0.8,
	do_sample=False
	)

	# Decode the response
	response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)

	# Free up memory
	del tokens
	torch.cuda.empty_cache()

	return response_text

	bot = ChatBot()

	title = "👋🏻Welcome to StableLM MED chat"
	description = """
	"""
	examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]

	iface = gr.Interface(
	fn=bot.predict,
	title=title,
	description=description,
	examples=examples,
	inputs=["text", "text"],
	outputs="text",
	theme="ParityError/Anime"
	)

	iface.launch()