sakurasaniya12345's picture
Update app.py
2270427 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load model with 4-bit quantization (if needed)
model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.1",
device_map="auto",
torch_dtype=torch.float16,
load_in_4bit=True
)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
def respond(message, history):
# Convert history to messages format
messages = []
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
# Generate response
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
outputs = model.generate(inputs, max_new_tokens=200)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Create chat interface with correct message format
demo = gr.ChatInterface(
respond,
chatbot=gr.Chatbot(type="messages"), # ← This fixes the warning
title="Mistral 7B Chatbot"
)
if __name__ == "__main__":
demo.launch()