MultiModelCoder

Sleeping

App Files Files Community

w1r4 commited on 23 days ago

Commit

92a045a

verified ·

1 Parent(s): ff4aed5

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -8

app.py CHANGED Viewed

@@ -1,15 +1,47 @@
 import gradio as gr
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
-        gr.Markdown("# Inference Provider")
-        gr.Markdown("This Space showcases the Qwen2.5-Coder-32B-Instruct model.")
-        button = gr.LoginButton("Sign in")
-    # Qwen 2.5 Coder is currently the most robust open coding model supported on the API
-    gr.load(
-        "models/Qwen/Qwen2.5-Coder-32B-Instruct",
-        accept_token=button
-    )
 demo.launch()

 import gradio as gr
+from huggingface_hub import InferenceClient
+# We use the 32B Coder model which is generally available on the free API
+model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+def respond(message, history):
+    # Initialize the client inside the function to handle sessions correctly
+    client = InferenceClient(model_id)
+    # Build the message history for the API
+    messages = []
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    # Generate the response
+    response_text = ""
+    try:
+        # Stream the response
+        stream = client.chat_completion(
+            messages,
+            max_tokens=2048,
+            stream=True,
+            temperature=0.7
+        )
+        for chunk in stream:
+            content = chunk.choices[0].delta.content
+            if content:
+                response_text += content
+                yield response_text
+    except Exception as e:
+        yield f"Error: {str(e)}. The model might be busy or too large for the current free tier."
+# Build the UI
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
+        gr.Markdown("# AI Coding Assistant")
+        gr.Markdown(f"Running **{model_id}**")
+        gr.Markdown("If you see an error, the free API might be overloaded. Try again in a minute.")
+        gr.LoginButton("Sign in")
+    gr.ChatInterface(respond)
 demo.launch()