| import gradio as gr |
| import os |
| from huggingface_hub import InferenceClient |
|
|
| |
| MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" |
| DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral." |
| client = None |
|
|
| def init_client(): |
| """Initialize the Hugging Face Inference Client""""" |
| global client |
| hf_token = os.environ.get("HF_TOKEN") |
| if hf_token: |
| client = InferenceClient(token=hf_token) |
| print("Inference client initialized successfully") |
| return True |
| else: |
| print("Warning: HF_TOKEN not found. Please set it in Space secrets.") |
| return False |
|
|
| def generate_response(message, history, system_prompt, max_tokens, temperature): |
| """Generate response using Hugging Face Inference API""""" |
| global client |
| |
| if client is None: |
| if not init_client(): |
| return "Error: HF_TOKEN not configured. Please add it in Space settings." |
| |
| try: |
| |
| messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}] |
| |
| for h in history: |
| if h[0]: |
| messages.append({"role": "user", "content": h[0]}) |
| if h[1]: |
| messages.append({"role": "assistant", "content": h[1]}) |
| |
| messages.append({"role": "user", "content": message}) |
| |
| |
| response = client.chat_completion( |
| model=MODEL_NAME, |
| messages=messages, |
| max_tokens=int(max_tokens), |
| temperature=float(temperature) |
| ) |
| |
| return response.choices[0].message.content |
| |
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
| |
| print("===== Kimi K2 Thinking Dev =====") |
| print(f"Using Inference API with model: {MODEL_NAME}") |
|
|
| |
| client_ready = init_client() |
|
|
| with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface: |
| gr.Markdown(""" |
| # ๐ค Kimi-K2 Instruct Chat |
| **Powered by Hugging Face Inference API** |
| |
| This space uses the Kimi-K2-Instruct quantized model via API for efficient inference. |
| """) |
| |
| if not client_ready: |
| gr.Markdown("โ ๏ธ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.") |
| |
| chatbot = gr.Chatbot(height=450, label="Chat") |
| |
| with gr.Row(): |
| msg = gr.Textbox( |
| placeholder="Type your message here...", |
| label="Your Message", |
| scale=4, |
| lines=2 |
| ) |
| submit_btn = gr.Button("Send ๐", variant="primary", scale=1) |
| |
| with gr.Accordion("โ๏ธ Settings", open=False): |
| system_prompt = gr.Textbox( |
| value=DEFAULT_SYSTEM_PROMPT, |
| label="System Prompt", |
| lines=2 |
| ) |
| with gr.Row(): |
| max_tokens = gr.Slider( |
| minimum=64, |
| maximum=2048, |
| value=512, |
| step=64, |
| label="Max Tokens" |
| ) |
| temperature = gr.Slider( |
| minimum=0.1, |
| maximum=2.0, |
| value=0.7, |
| step=0.1, |
| label="Temperature" |
| ) |
| |
| clear_btn = gr.Button("๐๏ธ Clear Chat") |
| |
| def respond(message, history, system_prompt, max_tokens, temperature): |
| if not message.strip(): |
| return "", history |
| response = generate_response(message, history, system_prompt, max_tokens, temperature) |
| history.append((message, response)) |
| return "", history |
| |
| msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot]) |
| submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot]) |
| clear_btn.click(lambda: [], None, chatbot) |
|
|
| if __name__ == "__main__": |
| iface.launch(server_name="0.0.0.0", server_port=7860) |