Spaces:
Running
Running
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from transformers import pipeline | |
| # ---------------- CONFIG ---------------- | |
| MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta" | |
| TRANSLATOR_MODEL = "facebook/m2m100_418M" # multilingual translator | |
| SYSTEM_PROMPT_DEFAULT = ( | |
| "You are Zephyr, a concise and polite AI assistant. " | |
| "Always respond in a formal tone and provide only the direct answer unless the user requests more detail." | |
| ) | |
| MAX_NEW_TOKENS_DEFAULT = 512 # increased to handle long answers | |
| TEMP_DEFAULT = 0.7 | |
| TOP_P_DEFAULT = 0.95 | |
| MAX_HISTORY_MESSAGES = 10 # limit chat history to prevent repetition | |
| # Clients | |
| client = InferenceClient(MODEL_REPO) | |
| translator = pipeline("translation", model=TRANSLATOR_MODEL) | |
| # ---------------- HELPERS ---------------- | |
| def is_translation_request(message: str) -> bool: | |
| triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"] | |
| if any(t in message.lower() for t in triggers): | |
| return True | |
| non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1) | |
| return non_ascii_ratio > 0.4 | |
| # ---------------- CHAT FUNCTION ---------------- | |
| def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style): | |
| # --- Translation handling --- | |
| if is_translation_request(message): | |
| try: | |
| translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"] | |
| chat_history.append({"role": "user", "content": message}) | |
| chat_history.append({"role": "assistant", "content": translated}) | |
| yield "", chat_history | |
| return | |
| except Exception as e: | |
| chat_history.append({"role": "user", "content": message}) | |
| chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"}) | |
| yield "", chat_history | |
| return | |
| # --- Apply response style --- | |
| if response_style == "No explanation": | |
| style_prompt = " Only provide the direct answer with no explanation." | |
| elif response_style == "Short explanation": | |
| style_prompt = " Provide a concise answer with a one-sentence explanation." | |
| else: # Detailed explanation | |
| style_prompt = " Provide a thorough and detailed answer with reasoning and examples." | |
| # --- Prepare messages --- | |
| # Only keep the last N messages to prevent repetition | |
| truncated_history = chat_history[-MAX_HISTORY_MESSAGES:] | |
| messages = [{"role": "system", "content": system_message + style_prompt}] + truncated_history | |
| messages.append({"role": "user", "content": message}) | |
| # Append user and placeholder for assistant | |
| chat_history.append({"role": "user", "content": message}) | |
| chat_history.append({"role": "assistant", "content": ""}) | |
| response = "" | |
| # --- Stream response --- | |
| for msg in client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = msg.choices[0].delta.content or "" | |
| response += token | |
| chat_history[-1]["content"] = response | |
| yield "", chat_history | |
| # Clear input box after streaming | |
| yield "", chat_history | |
| # ---------------- UI ---------------- | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo: | |
| gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator") | |
| chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant") | |
| with gr.Row(): | |
| msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6) | |
| send_btn = gr.Button("🚀 Send", variant="primary", scale=1) | |
| clear_btn = gr.Button("🧹 Clear Chat", scale=1) | |
| with gr.Accordion("⚙️ Advanced Settings", open=False): | |
| system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3) | |
| response_style = gr.Dropdown( | |
| ["No explanation", "Short explanation", "Detailed explanation"], | |
| value="No explanation", | |
| label="Response Style" | |
| ) | |
| temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p") | |
| max_tokens = gr.Slider(128, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens") | |
| # --- Events --- | |
| send_btn.click( | |
| stream_response, | |
| [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], | |
| [msg, chatbot] | |
| ) | |
| msg.submit( | |
| stream_response, | |
| [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], | |
| [msg, chatbot] | |
| ) | |
| clear_btn.click(lambda: [], None, chatbot, queue=False) | |
| gr.Markdown("---") | |
| gr.Markdown("🔗 Built with ❤️ using [Zephyr-7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) & [M2M100](https://huggingface.co/facebook/m2m100_418M).") | |
| if __name__ == "__main__": | |
| demo.launch() |