Spaces:
Sleeping
Sleeping
File size: 1,811 Bytes
4fc6aed 6e17e03 4fc6aed 4b66ba0 4fc6aed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load a small conversational model on CPU
MODEL_NAME = "microsoft/DialoGPT-small"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
def chat(message, history):
"""
Appends the user message to the conversation history,
generates a response from the model, and returns the updated history.
"""
if history is None:
history = []
# Build the conversation prompt by joining previous turns.
prompt = ""
for speaker, utterance in history:
prompt += f"{speaker}: {utterance}\n"
prompt += f"User: {message}\nBot: "
# Encode the prompt and generate a response (limit max new tokens for CPU speed)
input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt")
output_ids = model.generate(
input_ids,
max_length=input_ids.shape[1] + 50,
pad_token_id=tokenizer.eos_token_id,
do_sample=True,
top_p=0.95,
top_k=50
)
# Decode only the newly generated tokens.
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
# Update history and return an empty message (to clear the input box)
history.append(("User", message))
history.append(("Bot", response))
return "", history
# Build the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# CPU LLM Chat Demo\nThis is a simple chat interface using DialoGPT-small.")
chatbot = gr.Chatbot()
message_input = gr.Textbox(placeholder="Type your message here...", show_label=False)
state = gr.State([])
message_input.submit(chat, [message_input, state], [message_input, chatbot])
demo.launch()
|