Nymbo commited on
Commit
956ea73
Β·
verified Β·
1 Parent(s): e077167

swapping Claude for Qwen-30B-A3B on HF inference

Browse files
Files changed (1) hide show
  1. app.py +128 -161
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import asyncio
2
  import os
3
  import json
@@ -8,231 +13,193 @@ import gradio as gr
8
  from gradio.components.chatbot import ChatMessage
9
  from mcp import ClientSession, StdioServerParameters
10
  from mcp.client.stdio import stdio_client
11
- from anthropic import Anthropic
12
  from dotenv import load_dotenv
 
13
 
14
  load_dotenv()
15
 
16
  loop = asyncio.new_event_loop()
17
  asyncio.set_event_loop(loop)
18
 
 
19
  class MCPClientWrapper:
 
 
 
 
20
  def __init__(self):
21
  self.session = None
22
  self.exit_stack = None
23
- self.anthropic = Anthropic()
24
- self.tools = []
25
-
 
 
 
 
 
 
 
26
  def connect(self, server_path: str) -> str:
27
  return loop.run_until_complete(self._connect(server_path))
28
-
29
  async def _connect(self, server_path: str) -> str:
30
  if self.exit_stack:
31
  await self.exit_stack.aclose()
32
-
33
  self.exit_stack = AsyncExitStack()
34
-
35
- is_python = server_path.endswith('.py')
36
  command = "python" if is_python else "node"
37
-
38
  server_params = StdioServerParameters(
39
  command=command,
40
  args=[server_path],
41
- env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
 
 
 
 
42
  )
43
-
44
- stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
45
  self.stdio, self.write = stdio_transport
46
-
47
- self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
 
 
48
  await self.session.initialize()
49
-
50
  response = await self.session.list_tools()
51
- self.tools = [{
52
- "name": tool.name,
53
- "description": tool.description,
54
- "input_schema": tool.inputSchema
55
- } for tool in response.tools]
56
-
 
 
 
57
  tool_names = [tool["name"] for tool in self.tools]
58
  return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
59
-
60
- def process_message(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> tuple:
 
 
 
61
  if not self.session:
62
- return history + [
63
- {"role": "user", "content": message},
64
- {"role": "assistant", "content": "Please connect to an MCP server first."}
65
- ], gr.Textbox(value="")
66
-
 
 
 
 
 
 
 
67
  new_messages = loop.run_until_complete(self._process_query(message, history))
68
- return history + [{"role": "user", "content": message}] + new_messages, gr.Textbox(value="")
69
-
70
- async def _process_query(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]):
71
- claude_messages = []
72
- for msg in history:
73
- if isinstance(msg, ChatMessage):
74
- role, content = msg.role, msg.content
 
 
 
 
 
 
 
 
 
 
 
75
  else:
76
- role, content = msg.get("role"), msg.get("content")
77
-
78
- if role in ["user", "assistant", "system"]:
79
- claude_messages.append({"role": role, "content": content})
80
-
81
- claude_messages.append({"role": "user", "content": message})
82
-
83
- response = self.anthropic.messages.create(
84
- model="claude-3-5-sonnet-20241022",
85
- max_tokens=1000,
86
- messages=claude_messages,
87
- tools=self.tools
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  )
89
 
90
- result_messages = []
91
-
92
- for content in response.content:
93
- if content.type == 'text':
94
- result_messages.append({
95
- "role": "assistant",
96
- "content": content.text
97
- })
98
-
99
- elif content.type == 'tool_use':
100
- tool_name = content.name
101
- tool_args = content.input
102
-
103
- result_messages.append({
104
- "role": "assistant",
105
- "content": f"I'll use the {tool_name} tool to help answer your question.",
106
- "metadata": {
107
- "title": f"Using tool: {tool_name}",
108
- "log": f"Parameters: {json.dumps(tool_args, ensure_ascii=True)}",
109
- "status": "pending",
110
- "id": f"tool_call_{tool_name}"
111
- }
112
- })
113
-
114
- result_messages.append({
115
- "role": "assistant",
116
- "content": "```json\n" + json.dumps(tool_args, indent=2, ensure_ascii=True) + "\n```",
117
- "metadata": {
118
- "parent_id": f"tool_call_{tool_name}",
119
- "id": f"params_{tool_name}",
120
- "title": "Tool Parameters"
121
- }
122
- })
123
-
124
- result = await self.session.call_tool(tool_name, tool_args)
125
-
126
- if result_messages and "metadata" in result_messages[-2]:
127
- result_messages[-2]["metadata"]["status"] = "done"
128
-
129
- result_messages.append({
130
- "role": "assistant",
131
- "content": "Here are the results from the tool:",
132
- "metadata": {
133
- "title": f"Tool Result for {tool_name}",
134
- "status": "done",
135
- "id": f"result_{tool_name}"
136
- }
137
- })
138
-
139
- result_content = result.content
140
- if isinstance(result_content, list):
141
- result_content = "\n".join(str(item) for item in result_content)
142
-
143
- try:
144
- result_json = json.loads(result_content)
145
- if isinstance(result_json, dict) and "type" in result_json:
146
- if result_json["type"] == "image" and "url" in result_json:
147
- result_messages.append({
148
- "role": "assistant",
149
- "content": {"path": result_json["url"], "alt_text": result_json.get("message", "Generated image")},
150
- "metadata": {
151
- "parent_id": f"result_{tool_name}",
152
- "id": f"image_{tool_name}",
153
- "title": "Generated Image"
154
- }
155
- })
156
- else:
157
- result_messages.append({
158
- "role": "assistant",
159
- "content": "```\n" + result_content + "\n```",
160
- "metadata": {
161
- "parent_id": f"result_{tool_name}",
162
- "id": f"raw_result_{tool_name}",
163
- "title": "Raw Output"
164
- }
165
- })
166
- except:
167
- result_messages.append({
168
- "role": "assistant",
169
- "content": "```\n" + result_content + "\n```",
170
- "metadata": {
171
- "parent_id": f"result_{tool_name}",
172
- "id": f"raw_result_{tool_name}",
173
- "title": "Raw Output"
174
- }
175
- })
176
-
177
- claude_messages.append({"role": "user", "content": f"Tool result for {tool_name}: {result_content}"})
178
- next_response = self.anthropic.messages.create(
179
- model="claude-3-5-sonnet-20241022",
180
- max_tokens=1000,
181
- messages=claude_messages,
182
- )
183
-
184
- if next_response.content and next_response.content[0].type == 'text':
185
- result_messages.append({
186
- "role": "assistant",
187
- "content": next_response.content[0].text
188
- })
189
 
190
- return result_messages
191
 
 
192
  client = MCPClientWrapper()
193
 
 
194
  def gradio_interface():
195
  with gr.Blocks(title="MCP Weather Client") as demo:
196
  gr.Markdown("# MCP Weather Assistant")
197
  gr.Markdown("Connect to your MCP weather server and chat with the assistant")
198
-
199
  with gr.Row(equal_height=True):
200
  with gr.Column(scale=4):
201
  server_path = gr.Textbox(
202
  label="Server Script Path",
203
  placeholder="Enter path to server script (e.g., weather.py)",
204
- value="gradio_mcp_server.py"
205
  )
206
  with gr.Column(scale=1):
207
  connect_btn = gr.Button("Connect")
208
-
209
  status = gr.Textbox(label="Connection Status", interactive=False)
210
-
211
  chatbot = gr.Chatbot(
212
- value=[],
213
  height=500,
214
  type="messages",
215
  show_copy_button=True,
216
- avatar_images=("πŸ‘€", "πŸ€–")
217
  )
218
-
219
  with gr.Row(equal_height=True):
220
  msg = gr.Textbox(
221
  label="Your Question",
222
  placeholder="Ask about weather or alerts (e.g., What's the weather in New York?)",
223
- scale=4
224
  )
225
  clear_btn = gr.Button("Clear Chat", scale=1)
226
-
227
  connect_btn.click(client.connect, inputs=server_path, outputs=status)
228
  msg.submit(client.process_message, [msg, chatbot], [chatbot, msg])
229
  clear_btn.click(lambda: [], None, chatbot)
230
-
231
  return demo
232
 
 
 
233
  if __name__ == "__main__":
234
- if not os.getenv("ANTHROPIC_API_KEY"):
235
- print("Warning: ANTHROPIC_API_KEY not found in environment. Please set it in your .env file.")
236
-
 
 
 
237
  interface = gradio_interface()
238
- interface.launch(debug=True)
 
1
+ """
2
+ app.py – Hugging Face Space
3
+ Swaps Anthropic for HF Serverless Inference (Qwen3-235B-A22B)
4
+ """
5
+
6
  import asyncio
7
  import os
8
  import json
 
13
  from gradio.components.chatbot import ChatMessage
14
  from mcp import ClientSession, StdioServerParameters
15
  from mcp.client.stdio import stdio_client
 
16
  from dotenv import load_dotenv
17
+ from huggingface_hub import InferenceClient # NEW ✨
18
 
19
  load_dotenv()
20
 
21
  loop = asyncio.new_event_loop()
22
  asyncio.set_event_loop(loop)
23
 
24
+
25
  class MCPClientWrapper:
26
+ """
27
+ Wraps an MCP stdio client + a chat LLM (Qwen3-235B-A22B via HF Serverless).
28
+ """
29
+
30
  def __init__(self):
31
  self.session = None
32
  self.exit_stack = None
33
+ self.tools: List[Dict[str, Any]] = []
34
+
35
+ # --- NEW: Hugging Face client ---------------------------------------
36
+ self.hf_client = InferenceClient(
37
+ model="Qwen/Qwen3-235B-A22B",
38
+ token=os.getenv("HUGGINGFACE_API_TOKEN")
39
+ )
40
+ # --------------------------------------------------------------------
41
+
42
+ # ─────────────────────────── MCP CONNECTION ────────────────────────────
43
  def connect(self, server_path: str) -> str:
44
  return loop.run_until_complete(self._connect(server_path))
45
+
46
  async def _connect(self, server_path: str) -> str:
47
  if self.exit_stack:
48
  await self.exit_stack.aclose()
49
+
50
  self.exit_stack = AsyncExitStack()
51
+
52
+ is_python = server_path.endswith(".py")
53
  command = "python" if is_python else "node"
54
+
55
  server_params = StdioServerParameters(
56
  command=command,
57
  args=[server_path],
58
+ env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"},
59
+ )
60
+
61
+ stdio_transport = await self.exit_stack.enter_async_context(
62
+ stdio_client(server_params)
63
  )
 
 
64
  self.stdio, self.write = stdio_transport
65
+
66
+ self.session = await self.exit_stack.enter_async_context(
67
+ ClientSession(self.stdio, self.write)
68
+ )
69
  await self.session.initialize()
70
+
71
  response = await self.session.list_tools()
72
+ self.tools = [
73
+ {
74
+ "name": tool.name,
75
+ "description": tool.description,
76
+ "input_schema": tool.inputSchema,
77
+ }
78
+ for tool in response.tools
79
+ ]
80
+
81
  tool_names = [tool["name"] for tool in self.tools]
82
  return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
83
+
84
+ # ──────────────────────────── CHAT HANDLER ─────────────────────────────
85
+ def process_message(
86
+ self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
87
+ ) -> tuple:
88
  if not self.session:
89
+ return (
90
+ history
91
+ + [
92
+ {"role": "user", "content": message},
93
+ {
94
+ "role": "assistant",
95
+ "content": "Please connect to an MCP server first.",
96
+ },
97
+ ],
98
+ gr.Textbox(value=""),
99
+ )
100
+
101
  new_messages = loop.run_until_complete(self._process_query(message, history))
102
+ return (
103
+ history + [{"role": "user", "content": message}] + new_messages,
104
+ gr.Textbox(value=""),
105
+ )
106
+
107
+ # ────────────────────────── INTERNAL LLM CALL ─────────────────────────
108
+ async def _process_query(
109
+ self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
110
+ ):
111
+ """
112
+ Pushes the whole chat history to Qwen3-235B-A22B and returns its reply.
113
+ Tool calls are *not* forwarded – the HF endpoint only returns text.
114
+ """
115
+ # 1️⃣ Build message list in OpenAI-style dicts
116
+ messages: List[Dict[str, str]] = []
117
+ for item in history:
118
+ if isinstance(item, ChatMessage):
119
+ role, content = item.role, item.content
120
  else:
121
+ role, content = item.get("role"), item.get("content")
122
+
123
+ if role in {"user", "assistant", "system"}:
124
+ messages.append({"role": role, "content": content})
125
+ messages.append({"role": "user", "content": message})
126
+
127
+ # 2️⃣ Serialise to Qwen chat-markup
128
+ prompt_parts = []
129
+ for m in messages:
130
+ role = m["role"]
131
+ prompt_parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>")
132
+ prompt_parts.append("<|im_start|>assistant") # model will complete here
133
+ prompt = "\n".join(prompt_parts)
134
+
135
+ # 3️⃣ Call HF Serverless in a threadpool (non-blocking)
136
+ async def _generate():
137
+ return self.hf_client.text_generation(
138
+ prompt,
139
+ max_new_tokens=1024,
140
+ temperature=0.7,
141
+ stop_sequences=["<|im_end|>", "<|im_start|>"],
142
+ )
143
+
144
+ assistant_text: str = await asyncio.get_running_loop().run_in_executor(
145
+ None, _generate
146
  )
147
 
148
+ # 4️⃣ Return in Gradio-friendly format
149
+ return [{"role": "assistant", "content": assistant_text.strip()}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
 
151
 
152
+ # ──────────────────────────── GRADIO UI ───────────────────────────────────
153
  client = MCPClientWrapper()
154
 
155
+
156
  def gradio_interface():
157
  with gr.Blocks(title="MCP Weather Client") as demo:
158
  gr.Markdown("# MCP Weather Assistant")
159
  gr.Markdown("Connect to your MCP weather server and chat with the assistant")
160
+
161
  with gr.Row(equal_height=True):
162
  with gr.Column(scale=4):
163
  server_path = gr.Textbox(
164
  label="Server Script Path",
165
  placeholder="Enter path to server script (e.g., weather.py)",
166
+ value="gradio_mcp_server.py",
167
  )
168
  with gr.Column(scale=1):
169
  connect_btn = gr.Button("Connect")
170
+
171
  status = gr.Textbox(label="Connection Status", interactive=False)
172
+
173
  chatbot = gr.Chatbot(
174
+ value=[],
175
  height=500,
176
  type="messages",
177
  show_copy_button=True,
178
+ avatar_images=("πŸ‘€", "πŸ€–"),
179
  )
180
+
181
  with gr.Row(equal_height=True):
182
  msg = gr.Textbox(
183
  label="Your Question",
184
  placeholder="Ask about weather or alerts (e.g., What's the weather in New York?)",
185
+ scale=4,
186
  )
187
  clear_btn = gr.Button("Clear Chat", scale=1)
188
+
189
  connect_btn.click(client.connect, inputs=server_path, outputs=status)
190
  msg.submit(client.process_message, [msg, chatbot], [chatbot, msg])
191
  clear_btn.click(lambda: [], None, chatbot)
192
+
193
  return demo
194
 
195
+
196
+ # ──────────────────────────── ENTRY POINT ────────────────────────────────
197
  if __name__ == "__main__":
198
+ if not os.getenv("HUGGINGFACE_API_TOKEN"):
199
+ print(
200
+ "Warning: HUGGINGFACE_API_TOKEN not found in environment. "
201
+ "Set it in your .env file or Space secrets."
202
+ )
203
+
204
  interface = gradio_interface()
205
+ interface.launch(debug=True) # ← typo fixed