MultiModelCoder / app.py
w1r4's picture
Update app.py
9020491 verified
import gradio as gr
import os
from huggingface_hub import InferenceClient
# --- UPDATED: Best Coding Models on Free Tier (2025/2026) ---
AVAILABLE_MODELS = [
"Qwen/Qwen2.5-Coder-32B-Instruct", # SOTA Coding Model (Best overall)
"Qwen/Qwen2.5-72B-Instruct", # Larger General Model (Great at Logic)
"meta-llama/Llama-3.1-8B-Instruct", # Fast & Reliable
"mistralai/Mistral-7B-Instruct-v0.2:featherless-ai", # 12B Model (Very Smart)
"zai-org/GLM-4.7:zai-org", # Extremely Fast
"agentica-org/DeepCoder-14B-Preview:featherless-ai", # Specialized Code Model
"agentica-org/DeepCoder-14B-Preview", # Backup Model
]
def respond(message, history, system_message, temperature, model_id, request: gr.Request):
# --- 1. Authentication Logic ---
token = None
if request:
token = getattr(request, "token", None)
if token is None:
token = os.getenv("HF_TOKEN")
if token is None:
yield "Error: No authentication token found. Please add 'HF_TOKEN' to Space Secrets."
return
# --- 2. Setup Client ---
# The client is created dynamically based on the selected model_id
client = InferenceClient(model_id, token=token)
# --- 3. Build Messages ---
messages = [{"role": "system", "content": system_message}]
for msg in history:
messages.append(msg)
messages.append({"role": "user", "content": message})
# --- 4. Generate Response ---
try:
stream = client.chat_completion(
messages,
max_tokens=2048,
stream=True,
temperature=temperature,
top_p=0.9
)
response_text = ""
for chunk in stream:
# FIX: Check if choices exist before accessing index [0]
if not chunk.choices:
continue
content = chunk.choices[0].delta.content
if content:
response_text += content
yield response_text
except Exception as e:
# Better Error Handling for Model Switching
error_msg = str(e)
if "404" in error_msg or "model_not_supported" in error_msg:
yield f"Error: The model **{model_id}** is currently offline or overloaded. \n\n👉 Please select a different model from the dropdown above."
else:
yield f"Error: {error_msg}"
# --- 5. Build UI ---
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# 🤖 Multi-Model Coding Assistant")
gr.Markdown("Select a model from the **Additional Inputs** menu below to switch between different AI coding experts.")
gr.LoginButton("Sign in")
gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Textbox(
value="You are an expert software engineer. Provide clean, efficient, and well-commented code.",
label="System Instruction",
lines=2
),
gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.5,
step=0.1,
label="Temperature (Lower = More Precise)"
),
# --- Model Selection Dropdown ---
gr.Dropdown(
choices=AVAILABLE_MODELS,
value=AVAILABLE_MODELS[0], # Default to Qwen 2.5 Coder
label="Select AI Model",
interactive=True
)
]
)
demo.launch()