Spaces:
Running
Running
swapping Claude for Qwen-30B-A3B on HF inference
Browse files
app.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import asyncio
|
2 |
import os
|
3 |
import json
|
@@ -8,231 +13,193 @@ import gradio as gr
|
|
8 |
from gradio.components.chatbot import ChatMessage
|
9 |
from mcp import ClientSession, StdioServerParameters
|
10 |
from mcp.client.stdio import stdio_client
|
11 |
-
from anthropic import Anthropic
|
12 |
from dotenv import load_dotenv
|
|
|
13 |
|
14 |
load_dotenv()
|
15 |
|
16 |
loop = asyncio.new_event_loop()
|
17 |
asyncio.set_event_loop(loop)
|
18 |
|
|
|
19 |
class MCPClientWrapper:
|
|
|
|
|
|
|
|
|
20 |
def __init__(self):
|
21 |
self.session = None
|
22 |
self.exit_stack = None
|
23 |
-
self.
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def connect(self, server_path: str) -> str:
|
27 |
return loop.run_until_complete(self._connect(server_path))
|
28 |
-
|
29 |
async def _connect(self, server_path: str) -> str:
|
30 |
if self.exit_stack:
|
31 |
await self.exit_stack.aclose()
|
32 |
-
|
33 |
self.exit_stack = AsyncExitStack()
|
34 |
-
|
35 |
-
is_python = server_path.endswith(
|
36 |
command = "python" if is_python else "node"
|
37 |
-
|
38 |
server_params = StdioServerParameters(
|
39 |
command=command,
|
40 |
args=[server_path],
|
41 |
-
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
|
|
|
|
|
|
|
|
|
42 |
)
|
43 |
-
|
44 |
-
stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
|
45 |
self.stdio, self.write = stdio_transport
|
46 |
-
|
47 |
-
self.session = await self.exit_stack.enter_async_context(
|
|
|
|
|
48 |
await self.session.initialize()
|
49 |
-
|
50 |
response = await self.session.list_tools()
|
51 |
-
self.tools = [
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
57 |
tool_names = [tool["name"] for tool in self.tools]
|
58 |
return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
61 |
if not self.session:
|
62 |
-
return
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
new_messages = loop.run_until_complete(self._process_query(message, history))
|
68 |
-
return
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
else:
|
76 |
-
role, content =
|
77 |
-
|
78 |
-
if role in
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
)
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
for content in response.content:
|
93 |
-
if content.type == 'text':
|
94 |
-
result_messages.append({
|
95 |
-
"role": "assistant",
|
96 |
-
"content": content.text
|
97 |
-
})
|
98 |
-
|
99 |
-
elif content.type == 'tool_use':
|
100 |
-
tool_name = content.name
|
101 |
-
tool_args = content.input
|
102 |
-
|
103 |
-
result_messages.append({
|
104 |
-
"role": "assistant",
|
105 |
-
"content": f"I'll use the {tool_name} tool to help answer your question.",
|
106 |
-
"metadata": {
|
107 |
-
"title": f"Using tool: {tool_name}",
|
108 |
-
"log": f"Parameters: {json.dumps(tool_args, ensure_ascii=True)}",
|
109 |
-
"status": "pending",
|
110 |
-
"id": f"tool_call_{tool_name}"
|
111 |
-
}
|
112 |
-
})
|
113 |
-
|
114 |
-
result_messages.append({
|
115 |
-
"role": "assistant",
|
116 |
-
"content": "```json\n" + json.dumps(tool_args, indent=2, ensure_ascii=True) + "\n```",
|
117 |
-
"metadata": {
|
118 |
-
"parent_id": f"tool_call_{tool_name}",
|
119 |
-
"id": f"params_{tool_name}",
|
120 |
-
"title": "Tool Parameters"
|
121 |
-
}
|
122 |
-
})
|
123 |
-
|
124 |
-
result = await self.session.call_tool(tool_name, tool_args)
|
125 |
-
|
126 |
-
if result_messages and "metadata" in result_messages[-2]:
|
127 |
-
result_messages[-2]["metadata"]["status"] = "done"
|
128 |
-
|
129 |
-
result_messages.append({
|
130 |
-
"role": "assistant",
|
131 |
-
"content": "Here are the results from the tool:",
|
132 |
-
"metadata": {
|
133 |
-
"title": f"Tool Result for {tool_name}",
|
134 |
-
"status": "done",
|
135 |
-
"id": f"result_{tool_name}"
|
136 |
-
}
|
137 |
-
})
|
138 |
-
|
139 |
-
result_content = result.content
|
140 |
-
if isinstance(result_content, list):
|
141 |
-
result_content = "\n".join(str(item) for item in result_content)
|
142 |
-
|
143 |
-
try:
|
144 |
-
result_json = json.loads(result_content)
|
145 |
-
if isinstance(result_json, dict) and "type" in result_json:
|
146 |
-
if result_json["type"] == "image" and "url" in result_json:
|
147 |
-
result_messages.append({
|
148 |
-
"role": "assistant",
|
149 |
-
"content": {"path": result_json["url"], "alt_text": result_json.get("message", "Generated image")},
|
150 |
-
"metadata": {
|
151 |
-
"parent_id": f"result_{tool_name}",
|
152 |
-
"id": f"image_{tool_name}",
|
153 |
-
"title": "Generated Image"
|
154 |
-
}
|
155 |
-
})
|
156 |
-
else:
|
157 |
-
result_messages.append({
|
158 |
-
"role": "assistant",
|
159 |
-
"content": "```\n" + result_content + "\n```",
|
160 |
-
"metadata": {
|
161 |
-
"parent_id": f"result_{tool_name}",
|
162 |
-
"id": f"raw_result_{tool_name}",
|
163 |
-
"title": "Raw Output"
|
164 |
-
}
|
165 |
-
})
|
166 |
-
except:
|
167 |
-
result_messages.append({
|
168 |
-
"role": "assistant",
|
169 |
-
"content": "```\n" + result_content + "\n```",
|
170 |
-
"metadata": {
|
171 |
-
"parent_id": f"result_{tool_name}",
|
172 |
-
"id": f"raw_result_{tool_name}",
|
173 |
-
"title": "Raw Output"
|
174 |
-
}
|
175 |
-
})
|
176 |
-
|
177 |
-
claude_messages.append({"role": "user", "content": f"Tool result for {tool_name}: {result_content}"})
|
178 |
-
next_response = self.anthropic.messages.create(
|
179 |
-
model="claude-3-5-sonnet-20241022",
|
180 |
-
max_tokens=1000,
|
181 |
-
messages=claude_messages,
|
182 |
-
)
|
183 |
-
|
184 |
-
if next_response.content and next_response.content[0].type == 'text':
|
185 |
-
result_messages.append({
|
186 |
-
"role": "assistant",
|
187 |
-
"content": next_response.content[0].text
|
188 |
-
})
|
189 |
|
190 |
-
return result_messages
|
191 |
|
|
|
192 |
client = MCPClientWrapper()
|
193 |
|
|
|
194 |
def gradio_interface():
|
195 |
with gr.Blocks(title="MCP Weather Client") as demo:
|
196 |
gr.Markdown("# MCP Weather Assistant")
|
197 |
gr.Markdown("Connect to your MCP weather server and chat with the assistant")
|
198 |
-
|
199 |
with gr.Row(equal_height=True):
|
200 |
with gr.Column(scale=4):
|
201 |
server_path = gr.Textbox(
|
202 |
label="Server Script Path",
|
203 |
placeholder="Enter path to server script (e.g., weather.py)",
|
204 |
-
value="gradio_mcp_server.py"
|
205 |
)
|
206 |
with gr.Column(scale=1):
|
207 |
connect_btn = gr.Button("Connect")
|
208 |
-
|
209 |
status = gr.Textbox(label="Connection Status", interactive=False)
|
210 |
-
|
211 |
chatbot = gr.Chatbot(
|
212 |
-
value=[],
|
213 |
height=500,
|
214 |
type="messages",
|
215 |
show_copy_button=True,
|
216 |
-
avatar_images=("π€", "π€")
|
217 |
)
|
218 |
-
|
219 |
with gr.Row(equal_height=True):
|
220 |
msg = gr.Textbox(
|
221 |
label="Your Question",
|
222 |
placeholder="Ask about weather or alerts (e.g., What's the weather in New York?)",
|
223 |
-
scale=4
|
224 |
)
|
225 |
clear_btn = gr.Button("Clear Chat", scale=1)
|
226 |
-
|
227 |
connect_btn.click(client.connect, inputs=server_path, outputs=status)
|
228 |
msg.submit(client.process_message, [msg, chatbot], [chatbot, msg])
|
229 |
clear_btn.click(lambda: [], None, chatbot)
|
230 |
-
|
231 |
return demo
|
232 |
|
|
|
|
|
233 |
if __name__ == "__main__":
|
234 |
-
if not os.getenv("
|
235 |
-
print(
|
236 |
-
|
|
|
|
|
|
|
237 |
interface = gradio_interface()
|
238 |
-
interface.launch(debug=True)
|
|
|
1 |
+
"""
|
2 |
+
app.py β Hugging Face Space
|
3 |
+
Swaps Anthropic for HF Serverless Inference (Qwen3-235B-A22B)
|
4 |
+
"""
|
5 |
+
|
6 |
import asyncio
|
7 |
import os
|
8 |
import json
|
|
|
13 |
from gradio.components.chatbot import ChatMessage
|
14 |
from mcp import ClientSession, StdioServerParameters
|
15 |
from mcp.client.stdio import stdio_client
|
|
|
16 |
from dotenv import load_dotenv
|
17 |
+
from huggingface_hub import InferenceClient # NEW β¨
|
18 |
|
19 |
load_dotenv()
|
20 |
|
21 |
loop = asyncio.new_event_loop()
|
22 |
asyncio.set_event_loop(loop)
|
23 |
|
24 |
+
|
25 |
class MCPClientWrapper:
|
26 |
+
"""
|
27 |
+
Wraps an MCP stdio client + a chat LLM (Qwen3-235B-A22B via HF Serverless).
|
28 |
+
"""
|
29 |
+
|
30 |
def __init__(self):
|
31 |
self.session = None
|
32 |
self.exit_stack = None
|
33 |
+
self.tools: List[Dict[str, Any]] = []
|
34 |
+
|
35 |
+
# --- NEW: Hugging Face client ---------------------------------------
|
36 |
+
self.hf_client = InferenceClient(
|
37 |
+
model="Qwen/Qwen3-235B-A22B",
|
38 |
+
token=os.getenv("HUGGINGFACE_API_TOKEN")
|
39 |
+
)
|
40 |
+
# --------------------------------------------------------------------
|
41 |
+
|
42 |
+
# βββββββββββββββββββββββββββ MCP CONNECTION ββββββββββββββββββββββββββββ
|
43 |
def connect(self, server_path: str) -> str:
|
44 |
return loop.run_until_complete(self._connect(server_path))
|
45 |
+
|
46 |
async def _connect(self, server_path: str) -> str:
|
47 |
if self.exit_stack:
|
48 |
await self.exit_stack.aclose()
|
49 |
+
|
50 |
self.exit_stack = AsyncExitStack()
|
51 |
+
|
52 |
+
is_python = server_path.endswith(".py")
|
53 |
command = "python" if is_python else "node"
|
54 |
+
|
55 |
server_params = StdioServerParameters(
|
56 |
command=command,
|
57 |
args=[server_path],
|
58 |
+
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"},
|
59 |
+
)
|
60 |
+
|
61 |
+
stdio_transport = await self.exit_stack.enter_async_context(
|
62 |
+
stdio_client(server_params)
|
63 |
)
|
|
|
|
|
64 |
self.stdio, self.write = stdio_transport
|
65 |
+
|
66 |
+
self.session = await self.exit_stack.enter_async_context(
|
67 |
+
ClientSession(self.stdio, self.write)
|
68 |
+
)
|
69 |
await self.session.initialize()
|
70 |
+
|
71 |
response = await self.session.list_tools()
|
72 |
+
self.tools = [
|
73 |
+
{
|
74 |
+
"name": tool.name,
|
75 |
+
"description": tool.description,
|
76 |
+
"input_schema": tool.inputSchema,
|
77 |
+
}
|
78 |
+
for tool in response.tools
|
79 |
+
]
|
80 |
+
|
81 |
tool_names = [tool["name"] for tool in self.tools]
|
82 |
return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
|
83 |
+
|
84 |
+
# ββββββββββββββββββββββββββββ CHAT HANDLER βββββββββββββββββββββββββββββ
|
85 |
+
def process_message(
|
86 |
+
self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
|
87 |
+
) -> tuple:
|
88 |
if not self.session:
|
89 |
+
return (
|
90 |
+
history
|
91 |
+
+ [
|
92 |
+
{"role": "user", "content": message},
|
93 |
+
{
|
94 |
+
"role": "assistant",
|
95 |
+
"content": "Please connect to an MCP server first.",
|
96 |
+
},
|
97 |
+
],
|
98 |
+
gr.Textbox(value=""),
|
99 |
+
)
|
100 |
+
|
101 |
new_messages = loop.run_until_complete(self._process_query(message, history))
|
102 |
+
return (
|
103 |
+
history + [{"role": "user", "content": message}] + new_messages,
|
104 |
+
gr.Textbox(value=""),
|
105 |
+
)
|
106 |
+
|
107 |
+
# ββββββββββββββββββββββββββ INTERNAL LLM CALL βββββββββββββββββββββββββ
|
108 |
+
async def _process_query(
|
109 |
+
self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
|
110 |
+
):
|
111 |
+
"""
|
112 |
+
Pushes the whole chat history to Qwen3-235B-A22B and returns its reply.
|
113 |
+
Tool calls are *not* forwarded β the HF endpoint only returns text.
|
114 |
+
"""
|
115 |
+
# 1οΈβ£ Build message list in OpenAI-style dicts
|
116 |
+
messages: List[Dict[str, str]] = []
|
117 |
+
for item in history:
|
118 |
+
if isinstance(item, ChatMessage):
|
119 |
+
role, content = item.role, item.content
|
120 |
else:
|
121 |
+
role, content = item.get("role"), item.get("content")
|
122 |
+
|
123 |
+
if role in {"user", "assistant", "system"}:
|
124 |
+
messages.append({"role": role, "content": content})
|
125 |
+
messages.append({"role": "user", "content": message})
|
126 |
+
|
127 |
+
# 2οΈβ£ Serialise to Qwen chat-markup
|
128 |
+
prompt_parts = []
|
129 |
+
for m in messages:
|
130 |
+
role = m["role"]
|
131 |
+
prompt_parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>")
|
132 |
+
prompt_parts.append("<|im_start|>assistant") # model will complete here
|
133 |
+
prompt = "\n".join(prompt_parts)
|
134 |
+
|
135 |
+
# 3οΈβ£ Call HF Serverless in a threadpool (non-blocking)
|
136 |
+
async def _generate():
|
137 |
+
return self.hf_client.text_generation(
|
138 |
+
prompt,
|
139 |
+
max_new_tokens=1024,
|
140 |
+
temperature=0.7,
|
141 |
+
stop_sequences=["<|im_end|>", "<|im_start|>"],
|
142 |
+
)
|
143 |
+
|
144 |
+
assistant_text: str = await asyncio.get_running_loop().run_in_executor(
|
145 |
+
None, _generate
|
146 |
)
|
147 |
|
148 |
+
# 4οΈβ£ Return in Gradio-friendly format
|
149 |
+
return [{"role": "assistant", "content": assistant_text.strip()}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
|
|
151 |
|
152 |
+
# ββββββββββββββββββββββββββββ GRADIO UI βββββββββββββββββββββββββββββββββββ
|
153 |
client = MCPClientWrapper()
|
154 |
|
155 |
+
|
156 |
def gradio_interface():
|
157 |
with gr.Blocks(title="MCP Weather Client") as demo:
|
158 |
gr.Markdown("# MCP Weather Assistant")
|
159 |
gr.Markdown("Connect to your MCP weather server and chat with the assistant")
|
160 |
+
|
161 |
with gr.Row(equal_height=True):
|
162 |
with gr.Column(scale=4):
|
163 |
server_path = gr.Textbox(
|
164 |
label="Server Script Path",
|
165 |
placeholder="Enter path to server script (e.g., weather.py)",
|
166 |
+
value="gradio_mcp_server.py",
|
167 |
)
|
168 |
with gr.Column(scale=1):
|
169 |
connect_btn = gr.Button("Connect")
|
170 |
+
|
171 |
status = gr.Textbox(label="Connection Status", interactive=False)
|
172 |
+
|
173 |
chatbot = gr.Chatbot(
|
174 |
+
value=[],
|
175 |
height=500,
|
176 |
type="messages",
|
177 |
show_copy_button=True,
|
178 |
+
avatar_images=("π€", "π€"),
|
179 |
)
|
180 |
+
|
181 |
with gr.Row(equal_height=True):
|
182 |
msg = gr.Textbox(
|
183 |
label="Your Question",
|
184 |
placeholder="Ask about weather or alerts (e.g., What's the weather in New York?)",
|
185 |
+
scale=4,
|
186 |
)
|
187 |
clear_btn = gr.Button("Clear Chat", scale=1)
|
188 |
+
|
189 |
connect_btn.click(client.connect, inputs=server_path, outputs=status)
|
190 |
msg.submit(client.process_message, [msg, chatbot], [chatbot, msg])
|
191 |
clear_btn.click(lambda: [], None, chatbot)
|
192 |
+
|
193 |
return demo
|
194 |
|
195 |
+
|
196 |
+
# ββββββββββββββββββββββββββββ ENTRY POINT ββββββββββββββββββββββββββββββββ
|
197 |
if __name__ == "__main__":
|
198 |
+
if not os.getenv("HUGGINGFACE_API_TOKEN"):
|
199 |
+
print(
|
200 |
+
"Warning: HUGGINGFACE_API_TOKEN not found in environment. "
|
201 |
+
"Set it in your .env file or Space secrets."
|
202 |
+
)
|
203 |
+
|
204 |
interface = gradio_interface()
|
205 |
+
interface.launch(debug=True) # β typo fixed
|