Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# ---------------- CONFIG ---------------- | |
MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta" | |
SYSTEM_PROMPT_DEFAULT = "You are Zephyr, a helpful, concise and polite AI assistant." | |
MAX_NEW_TOKENS_DEFAULT = 512 | |
TEMP_DEFAULT = 0.7 | |
TOP_P_DEFAULT = 0.95 | |
# Create client (calls Hugging Face Inference API, not local model) | |
client = InferenceClient(MODEL_REPO) | |
# ---------------- CHAT FUNCTION ---------------- | |
def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p): | |
messages = [{"role": "system", "content": system_message}] | |
for user_msg, bot_msg in chat_history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if bot_msg: | |
messages.append({"role": "assistant", "content": bot_msg}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for msg in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = msg.choices[0].delta.content or "" | |
response += token | |
yield "", chat_history + [(message, response)] | |
# ---------------- UI ---------------- | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo: | |
gr.Markdown( | |
""" | |
# 📱 Zephyr-7B (Hosted on Hugging Face Inference API) | |
Optimized for **mobile-friendly chat** ✨ | |
<span style="opacity:0.7">Powered by HuggingFaceH4/zephyr-7b-beta</span> | |
""" | |
) | |
chatbot = gr.Chatbot( | |
height=500, | |
bubble_full_width=False, | |
show_copy_button=True, | |
label="Chat" | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
label="💬 Message", | |
placeholder="Type your message…", | |
scale=6 | |
) | |
send_btn = gr.Button("🚀", variant="primary", scale=1) | |
clear_btn = gr.Button("🧹", scale=1) | |
with gr.Accordion("⚙️ Settings", open=False): | |
system_prompt = gr.Textbox( | |
label="System Prompt", | |
value=SYSTEM_PROMPT_DEFAULT, | |
lines=3 | |
) | |
temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature") | |
top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p") | |
max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens") | |
# Events (streaming response) | |
send_btn.click( | |
stream_response, | |
[msg, chatbot, system_prompt, max_tokens, temperature, top_p], | |
[msg, chatbot] | |
) | |
msg.submit( | |
stream_response, | |
[msg, chatbot, system_prompt, max_tokens, temperature, top_p], | |
[msg, chatbot] | |
) | |
clear_btn.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.launch() | |