Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Load model with 4-bit quantization (if needed) | |
model = AutoModelForCausalLM.from_pretrained( | |
"mistralai/Mistral-7B-Instruct-v0.1", | |
device_map="auto", | |
torch_dtype=torch.float16, | |
load_in_4bit=True | |
) | |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
def respond(message, history): | |
# Convert history to messages format | |
messages = [] | |
for user_msg, bot_msg in history: | |
messages.append({"role": "user", "content": user_msg}) | |
if bot_msg: | |
messages.append({"role": "assistant", "content": bot_msg}) | |
messages.append({"role": "user", "content": message}) | |
# Generate response | |
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") | |
outputs = model.generate(inputs, max_new_tokens=200) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Create chat interface with correct message format | |
demo = gr.ChatInterface( | |
respond, | |
chatbot=gr.Chatbot(type="messages"), # ← This fixes the warning | |
title="Mistral 7B Chatbot" | |
) | |
if __name__ == "__main__": | |
demo.launch() | |