| import gradio as gr |
| from huggingface_hub import InferenceClient |
| import traceback |
|
|
| def get_text(content): |
| if isinstance(content, str): return content |
| if isinstance(content, list): |
| return "".join([block.get("text", "") for block in content if block.get("type") == "text"]) |
| if isinstance(content, dict): return content.get("text", str(content)) |
| return str(content) |
|
|
| def respond( |
| message, |
| history: list[dict], |
| system_message, |
| max_tokens, |
| temperature, |
| top_p, |
| hf_token: gr.OAuthToken, |
| ): |
| if not hf_token or not hf_token.token: |
| yield "⚠️ Please **Login** in the sidebar to access @frusto360 AI." |
| return |
|
|
| try: |
| |
| |
| MODEL_ID = "Frusto/llama-3.2-1b-frusto360-final" |
| API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}" |
| |
| client = InferenceClient(base_url=API_URL, token=hf_token.token) |
|
|
| |
| prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>" |
| for msg in history: |
| role = msg.get("role", "user") |
| content = get_text(msg.get("content", "")) |
| prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>" |
| |
| prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{get_text(message)}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" |
|
|
| response = "" |
| for token in client.text_generation( |
| prompt, |
| max_new_tokens=max_tokens, |
| stream=True, |
| temperature=temperature, |
| top_p=top_p, |
| stop=["<|eot_id|>"] |
| ): |
| token_text = token if isinstance(token, str) else getattr(token, 'token', getattr(token, 'text', str(token))) |
| response += token_text |
| yield response |
| |
| except Exception as e: |
| yield f"❌ **Router Error:** {str(e)}\n\n*Note: Ensure 'Inference API' is enabled in your model settings.*" |
|
|
| |
| chatbot_interface = gr.ChatInterface( |
| respond, |
| additional_inputs=[ |
| gr.Textbox(value="You are the @frusto360 AI.", label="System message"), |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"), |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), |
| ], |
| ) |
|
|
| with gr.Blocks(fill_height=True) as demo: |
| with gr.Sidebar(): |
| gr.Markdown("## 🔐 @frusto360 Auth") |
| gr.LoginButton() |
| chatbot_interface.render() |
|
|
| if __name__ == "__main__": |
| demo.launch(theme="glass") |