w1r4 commited on
Commit
92a045a
·
verified ·
1 Parent(s): ff4aed5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -8
app.py CHANGED
@@ -1,15 +1,47 @@
1
  import gradio as gr
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  with gr.Blocks(fill_height=True) as demo:
4
  with gr.Sidebar():
5
- gr.Markdown("# Inference Provider")
6
- gr.Markdown("This Space showcases the Qwen2.5-Coder-32B-Instruct model.")
7
- button = gr.LoginButton("Sign in")
 
8
 
9
- # Qwen 2.5 Coder is currently the most robust open coding model supported on the API
10
- gr.load(
11
- "models/Qwen/Qwen2.5-Coder-32B-Instruct",
12
- accept_token=button
13
- )
14
 
15
  demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
 
4
+ # We use the 32B Coder model which is generally available on the free API
5
+ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
6
+
7
+ def respond(message, history):
8
+ # Initialize the client inside the function to handle sessions correctly
9
+ client = InferenceClient(model_id)
10
+
11
+ # Build the message history for the API
12
+ messages = []
13
+ for user_msg, bot_msg in history:
14
+ messages.append({"role": "user", "content": user_msg})
15
+ messages.append({"role": "assistant", "content": bot_msg})
16
+
17
+ messages.append({"role": "user", "content": message})
18
+
19
+ # Generate the response
20
+ response_text = ""
21
+ try:
22
+ # Stream the response
23
+ stream = client.chat_completion(
24
+ messages,
25
+ max_tokens=2048,
26
+ stream=True,
27
+ temperature=0.7
28
+ )
29
+ for chunk in stream:
30
+ content = chunk.choices[0].delta.content
31
+ if content:
32
+ response_text += content
33
+ yield response_text
34
+ except Exception as e:
35
+ yield f"Error: {str(e)}. The model might be busy or too large for the current free tier."
36
+
37
+ # Build the UI
38
  with gr.Blocks(fill_height=True) as demo:
39
  with gr.Sidebar():
40
+ gr.Markdown("# AI Coding Assistant")
41
+ gr.Markdown(f"Running **{model_id}**")
42
+ gr.Markdown("If you see an error, the free API might be overloaded. Try again in a minute.")
43
+ gr.LoginButton("Sign in")
44
 
45
+ gr.ChatInterface(respond)
 
 
 
 
46
 
47
  demo.launch()