Spaces:

Asilbek14
/

zephyr-for-mobile

Running

App Files Files Community

zephyr-for-mobile / app.py

Asilbek14

Update app.py

5214a6c verified about 1 month ago

raw

history blame

2.91 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# ---------------- CONFIG ----------------
	MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
	SYSTEM_PROMPT_DEFAULT = "You are Zephyr, a helpful, concise and polite AI assistant."

	MAX_NEW_TOKENS_DEFAULT = 512
	TEMP_DEFAULT = 0.7
	TOP_P_DEFAULT = 0.95

	# Create client (calls Hugging Face Inference API, not local model)
	client = InferenceClient(MODEL_REPO)

	# ---------------- CHAT FUNCTION ----------------
	def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p):
	messages = [{"role": "system", "content": system_message}]
	for user_msg, bot_msg in chat_history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if bot_msg:
	messages.append({"role": "assistant", "content": bot_msg})
	messages.append({"role": "user", "content": message})

	response = ""
	for msg in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = msg.choices[0].delta.content or ""
	response += token
	yield "", chat_history + [(message, response)]


	# ---------------- UI ----------------
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
	gr.Markdown(
	"""
	# 📱 Zephyr-7B (Hosted on Hugging Face Inference API)
	Optimized for mobile-friendly chat ✨
	<span style="opacity:0.7">Powered by HuggingFaceH4/zephyr-7b-beta</span>
	"""
	)

	chatbot = gr.Chatbot(
	height=500,
	bubble_full_width=False,
	show_copy_button=True,
	label="Chat"
	)

	with gr.Row():
	msg = gr.Textbox(
	label="💬 Message",
	placeholder="Type your message…",
	scale=6
	)
	send_btn = gr.Button("🚀", variant="primary", scale=1)
	clear_btn = gr.Button("🧹", scale=1)

	with gr.Accordion("⚙️ Settings", open=False):
	system_prompt = gr.Textbox(
	label="System Prompt",
	value=SYSTEM_PROMPT_DEFAULT,
	lines=3
	)
	temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
	top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
	max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")

	# Events (streaming response)
	send_btn.click(
	stream_response,
	[msg, chatbot, system_prompt, max_tokens, temperature, top_p],
	[msg, chatbot]
	)
	msg.submit(
	stream_response,
	[msg, chatbot, system_prompt, max_tokens, temperature, top_p],
	[msg, chatbot]
	)
	clear_btn.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.launch()