import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from huggingface_hub import InferenceClient # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained("arcee-ai/Arcee-Spark") model = AutoModelForCausalLM.from_pretrained("arcee-ai/Arcee-Spark") # Function to generate response def respond( message, history, system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) #response = "" response = client.chat_completion(messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p) yield response # Placeholder yield for response # Define Gradio interface demo = gr.Interface( fn=respond, inputs=[ gr.Textbox(label="System message", default="You are a friendly Chatbot."), gr.Slider(label="Max new tokens", minimum=1, maximum=2048, step=1, default=512), gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, default=0.7), gr.Slider(label="Top-p (nucleus sampling)", minimum=0.1, maximum=1.0, step=0.05, default=0.95), ], outputs=gr.Textbox(label="Response"), title="Chatbot Demo", ) # Launch Gradio interface if __name__ == "__main__": demo.launch()