OppaAI commited on
Commit
e6b6ea7
·
verified ·
1 Parent(s): 9230f22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -21
app.py CHANGED
@@ -10,19 +10,16 @@ HF_DATASET_REPO = "OppaAI/Robot_MCP"
10
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
11
 
12
  # --- MCP server instance ---
13
- mcp = FastMCP("Robot MCP")
14
-
15
- # --- STIO for the LLM ---
16
- #stio = STIO(mcp) # Bind STIO to MCP tools
17
 
18
  # --- MCP Tool ---
19
  @mcp.tool()
20
- def say_hi(greeting_text: str = "Hi there!"):
21
  """Return a greeting command in JSON."""
22
  return {"command": "say_hi", "text": greeting_text}
23
 
24
  # --- Helper Functions ---
25
- def save_and_upload_image(image_b64, hf_token):
26
  image_bytes = base64.b64decode(image_b64)
27
  local_tmp_path = "/tmp/tmp.jpg"
28
  with open(local_tmp_path, "wb") as f:
@@ -58,17 +55,19 @@ def process_and_describe(payload: dict):
58
  # Initialize HF client
59
  hf_client = InferenceClient(token=hf_token)
60
 
61
- # --- System prompt with STIO instructions ---
62
- system_prompt = f"""
63
- You are a helpful robot assistant. You have access to MCP tools via STIO.
64
  When you receive an image, you must:
65
- 1️⃣ Describe the image in detail.
66
- 2️⃣ Decide actions for the robot. Example:
67
- - Human figure → call `say_hi` tool with a friendly greeting (vary every time)
68
- 3️⃣ Use STIO to call the tools. Always respond in JSON if calling tools.
69
-
70
- Available tools:
71
- {stio.describe_tools()}
 
 
72
  """
73
 
74
  messages_payload = [
@@ -79,7 +78,7 @@ def process_and_describe(payload: dict):
79
  ]}
80
  ]
81
 
82
- # --- Call VLM with STIO ---
83
  chat_completion = hf_client.chat.completions.create(
84
  model=HF_VLM_MODEL,
85
  messages=messages_payload,
@@ -88,8 +87,17 @@ def process_and_describe(payload: dict):
88
 
89
  vlm_text = chat_completion.choices[0].message.content.strip()
90
 
91
- # --- Use STIO to execute tool calls if present ---
92
- tool_results = stio.run(vlm_text)
 
 
 
 
 
 
 
 
 
93
 
94
  return {
95
  "saved_to_hf_hub": True,
@@ -99,7 +107,9 @@ def process_and_describe(payload: dict):
99
  "file_size_bytes": size_bytes,
100
  "robot_id": robot_id,
101
  "vlm_response": vlm_text,
102
- "tool_results": tool_results
 
 
103
  }
104
 
105
  except Exception as e:
@@ -113,6 +123,14 @@ demo = gr.Interface(
113
  api_name="predict"
114
  )
115
 
116
- #
117
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
118
  demo.launch(mcp_server=True)
 
10
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
11
 
12
  # --- MCP server instance ---
13
+ mcp = FastMCP(name="Robot MCP")
 
 
 
14
 
15
  # --- MCP Tool ---
16
  @mcp.tool()
17
+ def say_hi(greeting_text: str = "Hi there!") -> dict:
18
  """Return a greeting command in JSON."""
19
  return {"command": "say_hi", "text": greeting_text}
20
 
21
  # --- Helper Functions ---
22
+ def save_and_upload_image(image_b64: str, hf_token: str):
23
  image_bytes = base64.b64decode(image_b64)
24
  local_tmp_path = "/tmp/tmp.jpg"
25
  with open(local_tmp_path, "wb") as f:
 
55
  # Initialize HF client
56
  hf_client = InferenceClient(token=hf_token)
57
 
58
+ # System prompt (without stio.describe_tools because not using STIO here)
59
+ system_prompt = """
60
+ You are a helpful robot assistant.
61
  When you receive an image, you must:
62
+ 1. Describe the image in detail.
63
+ 2. Decide actions for the robot. Example:
64
+ - Human figure → call the `say_hi` tool with a friendly greeting (vary every time)
65
+ Always respond in JSON with:
66
+ {
67
+ "description": "...",
68
+ "action": "say_hi",
69
+ "greeting_text": "a friendly greeting"
70
+ }
71
  """
72
 
73
  messages_payload = [
 
78
  ]}
79
  ]
80
 
81
+ # Call VLM
82
  chat_completion = hf_client.chat.completions.create(
83
  model=HF_VLM_MODEL,
84
  messages=messages_payload,
 
87
 
88
  vlm_text = chat_completion.choices[0].message.content.strip()
89
 
90
+ # Parse JSON from VLM
91
+ try:
92
+ action_data = json.loads(vlm_text)
93
+ except json.JSONDecodeError:
94
+ action_data = {"description": vlm_text, "action": None, "greeting_text": None}
95
+
96
+ # Call the tool if action == say_hi
97
+ tool_result = None
98
+ if action_data.get("action") == "say_hi":
99
+ greeting = action_data.get("greeting_text") or "Hi!"
100
+ tool_result = say_hi(greeting_text=greeting)
101
 
102
  return {
103
  "saved_to_hf_hub": True,
 
107
  "file_size_bytes": size_bytes,
108
  "robot_id": robot_id,
109
  "vlm_response": vlm_text,
110
+ "vlm_action": action_data.get("action"),
111
+ "vlm_description": action_data.get("description"),
112
+ "tool_result": tool_result
113
  }
114
 
115
  except Exception as e:
 
123
  api_name="predict"
124
  )
125
 
 
126
  if __name__ == "__main__":
127
+ # Run FastMCP server *in the same process* (blocking)
128
+ import threading
129
+
130
+ def run_mcp():
131
+ mcp.run(transport="stdio")
132
+
133
+ t = threading.Thread(target=run_mcp, daemon=True)
134
+ t.start()
135
+
136
  demo.launch(mcp_server=True)