Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 18, 2025

Commit

d722b23

verified ·

1 Parent(s): 0cc0fd6

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -28

app.py CHANGED Viewed

@@ -3,14 +3,32 @@ import base64
 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
 import json
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
 HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
 # --- Helper Functions ---
 def save_and_upload_image(image_b64, hf_token):
-    """Save image to /tmp and upload to HF dataset."""
     image_bytes = base64.b64decode(image_b64)
     local_tmp_path = "/tmp/tmp.jpg"
     with open(local_tmp_path, "wb") as f:
@@ -28,7 +46,6 @@ def save_and_upload_image(image_b64, hf_token):
     hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
     return local_tmp_path, hf_image_url, path_in_repo, len(image_bytes)
 # --- Main MCP function ---
 def process_and_describe(payload: dict):
     try:
@@ -41,23 +58,16 @@ def process_and_describe(payload: dict):
         if not image_b64:
             return {"error": "No image provided."}
-        # Save & upload
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
-        # Init HF client
         hf_client = InferenceClient(token=hf_token)
-        # System prompt: describe + suggest action
         system_prompt = """
         You are a helpful robot assistant.
-        1. Describe the image in detail, especially humans in full detail.
-        2. Suggest what the robot should do next based on what it sees:
            - Human figure → say 'Hi'.
-           - Ball → move towards it.
-           - Obstacles → stop or avoid.
-           - Animal → identify the animal and take photos
         Always respond in JSON:
-        {"description": "...", "action": {"move": "...", "interact": "..."}}
         """
         messages_payload = [
@@ -68,27 +78,24 @@ def process_and_describe(payload: dict):
             ]}
         ]
-        # Call VLM
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
-            max_tokens=300
         )
-        # Robustly extract text
-        try:
-            vlm_text = chat_completion.choices[0].message.content.strip()
-        except Exception:
-            # fallback if structure is different
-            vlm_text = str(chat_completion)
-        # Attempt to parse JSON from VLM
         action_data = {}
         try:
             action_data = json.loads(vlm_text)
         except Exception:
-            # If VLM didn't return valid JSON, wrap text as description
-            action_data = {"description": vlm_text, "action": {"move": "unknown", "interact": "unknown"}}
         return {
             "saved_to_hf_hub": True,
@@ -98,18 +105,18 @@ def process_and_describe(payload: dict):
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
             "vlm_response": vlm_text,
-            "vlm_action": action_data.get("action", {}),
-            "vlm_description": action_data.get("description", "")
         }
     except Exception as e:
         return {"error": f"An API error occurred: {str(e)}"}
 # --- Gradio MCP Interface ---
 demo = gr.Interface(
     fn=process_and_describe,
-    inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
     outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )

 import gradio as gr
 from huggingface_hub import upload_file, InferenceClient
 import json
+from fastmcp import MCP, MCPClient
+from playsound import playsound
+from gtts import gTTS
 # --- Config ---
 HF_DATASET_REPO = "OppaAI/Robot_MCP"
 HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
+# --- MCP server instance ---
+mcp = MCP()  # 用於定義工具
+# --- MCP Tool ---
+@mcp.tools()
+def say_hi(text="Hi!"):
+    # 1️⃣ 生成 mp3
+    tts = gTTS(text=text, lang="en")
+    tmp_path = "/tmp/say_hi.mp3"
+    tts.save(tmp_path)
+    # 2️⃣ 播放音檔
+    playsound(tmp_path)
+    return f"Played: {text}"
 # --- Helper Functions ---
 def save_and_upload_image(image_b64, hf_token):
     image_bytes = base64.b64decode(image_b64)
     local_tmp_path = "/tmp/tmp.jpg"
     with open(local_tmp_path, "wb") as f:
     hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
     return local_tmp_path, hf_image_url, path_in_repo, len(image_bytes)
 # --- Main MCP function ---
 def process_and_describe(payload: dict):
     try:
         if not image_b64:
             return {"error": "No image provided."}
         local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
         hf_client = InferenceClient(token=hf_token)
         system_prompt = """
         You are a helpful robot assistant.
+        1. Describe the image in detail.
+        2. Suggest what the robot should do next.
            - Human figure → say 'Hi'.
         Always respond in JSON:
+        {"description": "...", "action": "say_hi"}
         """
         messages_payload = [
             ]}
         ]
         chat_completion = hf_client.chat.completions.create(
             model=HF_VLM_MODEL,
             messages=messages_payload,
+            max_tokens=200
         )
+        vlm_text = chat_completion.choices[0].message.content.strip()
         action_data = {}
         try:
             action_data = json.loads(vlm_text)
         except Exception:
+            action_data = {"description": vlm_text, "action": "unknown"}
+        # --- Call MCP tool ---
+        vlm_action = action_data.get("action")
+        tool_result = None
+        if vlm_action == "say_hi":
+            tool_result = say_hi(text="Hi!")  # 這裡會生成 /tmp/say_hi.mp3
         return {
             "saved_to_hf_hub": True,
             "file_size_bytes": size_bytes,
             "robot_id": robot_id,
             "vlm_response": vlm_text,
+            "vlm_action": vlm_action,
+            "vlm_description": action_data.get("description", ""),
+            "tool_result": tool_result
         }
     except Exception as e:
         return {"error": f"An API error occurred: {str(e)}"}
 # --- Gradio MCP Interface ---
 demo = gr.Interface(
     fn=process_and_describe,
+    inputs=gr.JSON(label="Input Payload"),
     outputs=gr.JSON(label="Reply to Jetson"),
     api_name="predict"
 )