# import gradio as gr # from huggingface_hub import InferenceClient # """ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference # """ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # def respond( # message, # history: list[tuple[str, str]], # system_message, # max_tokens, # temperature, # top_p, # ): # messages = [{"role": "system", "content": system_message}] # for val in history: # if val[0]: # messages.append({"role": "user", "content": val[0]}) # if val[1]: # messages.append({"role": "assistant", "content": val[1]}) # messages.append({"role": "user", "content": message}) # response = "" # for message in client.chat_completion( # messages, # max_tokens=max_tokens, # stream=True, # temperature=temperature, # top_p=top_p, # ): # token = message.choices[0].delta.content # response += token # yield response # """ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface # """ # demo = gr.ChatInterface( # respond, # additional_inputs=[ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # gr.Slider( # minimum=0.1, # maximum=1.0, # value=0.95, # step=0.05, # label="Top-p (nucleus sampling)", # ), # ], # ) # if __name__ == "__main__": # demo.launch() import gradio as gr from huggingface_hub import InferenceClient import time import random from datetime import datetime # Theme and styling constants THEME = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", radius_size=gr.themes.sizes.radius_sm, font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], ) # Configuration MODEL_ID = "HuggingFaceH4/zephyr-7b-beta" DEFAULT_SYSTEM_MSG = "You are a helpful, friendly, and knowledgeable AI assistant." # Initialize the client client = InferenceClient(MODEL_ID) def format_history(history): """Helper function to format chat history for display""" formatted = [] for user_msg, ai_msg in history: if user_msg: formatted.append({"role": "user", "content": user_msg}) if ai_msg: formatted.append({"role": "assistant", "content": ai_msg}) return formatted def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, model_id, typing_animation=True ): """Generate response from the model with typing animation effect""" # Format messages for the API messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) # Use the selected model inference_client = InferenceClient(model_id) # Generate response with typing animation response = "" for message in inference_client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content if token: response += token # If typing animation is enabled, add a small random delay if typing_animation: time.sleep(random.uniform(0.01, 0.03)) yield response def create_interface(): """Create and configure the Gradio interface""" # Available models dropdown models = [ "HuggingFaceH4/zephyr-7b-beta", "mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Llama-2-7b-chat-hf", "gpt2" # Fallback for quick testing ] # Custom CSS for better styling css = """ .gradio-container { min-height: 100vh; } .message-bubble { padding: 10px 15px; border-radius: 12px; margin-bottom: 8px; } .user-bubble { background-color: #e9f5ff; margin-left: 20px; } .bot-bubble { background-color: #f0f4f9; margin-right: 20px; } .timestamp { font-size: 0.7em; color: #888; margin-top: 2px; } """ with gr.Blocks(theme=THEME, css=css) as demo: gr.Markdown("# 🤖 Enhanced AI Chat Interface") gr.Markdown("Chat with state-of-the-art language models from Hugging Face") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( label="Conversation", bubble_full_width=False, height=600, avatar_images=("👤", "🤖"), show_copy_button=True ) with gr.Row(): msg = gr.Textbox( placeholder="Type your message here...", show_label=False, container=False, scale=9 ) submit_btn = gr.Button("Send", variant="primary", scale=1) with gr.Accordion("Conversation Summary", open=False): summary = gr.Textbox(label="Key points from this conversation", lines=3, interactive=False) summary_btn = gr.Button("Generate Summary", variant="secondary") with gr.Column(scale=1): with gr.Accordion("Model Settings", open=True): model_selection = gr.Dropdown( models, value=MODEL_ID, label="Select Model", info="Choose which AI model to chat with" ) system_msg = gr.Textbox( value=DEFAULT_SYSTEM_MSG, label="System Message", info="Instructions that define how the AI behaves", lines=3 ) max_tokens = gr.Slider( minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens", info="Maximum length of generated response" ) with gr.Row(): with gr.Column(): temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature", info="Higher = more creative, Lower = more focused" ) with gr.Column(): top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", info="Controls randomness in token selection" ) typing_effect = gr.Checkbox( label="Enable Typing Animation", value=True, info="Show realistic typing animation" ) with gr.Accordion("Tools", open=False): clear_btn = gr.Button("Clear Conversation", variant="secondary") export_btn = gr.Button("Export Chat History", variant="secondary") chat_download = gr.File(label="Download", interactive=False, visible=False) # Event handlers msg_submit = msg.submit( fn=respond, inputs=[msg, chatbot, system_msg, max_tokens, temperature, top_p, model_selection, typing_effect], outputs=[chatbot], queue=True ) submit_click = submit_btn.click( fn=respond, inputs=[msg, chatbot, system_msg, max_tokens, temperature, top_p, model_selection, typing_effect], outputs=[chatbot], queue=True ) # Clear the input field after sending msg_submit.then(lambda: "", None, msg) submit_click.then(lambda: "", None, msg) # Clear chat history def clear_history(): return None clear_btn.click( fn=clear_history, inputs=[], outputs=[chatbot] ) # Export chat history def export_history(history): if not history: return None timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"chat_history_{timestamp}.txt" with open(filename, "w") as f: f.write("# Chat History\n\n") f.write(f"Exported on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") for user_msg, ai_msg in history: f.write(f"## User\n{user_msg}\n\n") f.write(f"## AI\n{ai_msg}\n\n") f.write("---\n\n") return filename export_btn.click( fn=export_history, inputs=[chatbot], outputs=[chat_download], queue=False ).then( lambda: gr.update(visible=True), None, [chat_download] ) # Generate conversation summary (simplified implementation) def generate_summary(history): if not history or len(history) < 2: return "Not enough conversation to summarize yet." # In a real application, you might want to send this to the model # Here we're just creating a simple summary topics = [] for user_msg, _ in history: if user_msg and len(user_msg.split()) > 3: # Simple heuristic topics.append(user_msg.split()[0:3]) if topics: return f"This conversation covered {len(history)} exchanges about various topics." else: return "Brief conversation with no clear topics." summary_btn.click( fn=generate_summary, inputs=[chatbot], outputs=[summary] ) return demo # Create and launch the interface demo = create_interface() if __name__ == "__main__": demo.launch(share=False, debug=False)