Spaces:

OppaAI
/

Robot_MCP_Server

Sleeping

App Files Files Community

OppaAI commited on Nov 23, 2025

Commit

bdb8def

verified ·

1 Parent(s): eaeee99

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -60

app.py CHANGED Viewed

@@ -3,26 +3,21 @@ import base64
 import json
 from datetime import datetime
 import traceback
-from typing import Optional, Dict, Any
 import gradio as gr
 from huggingface_hub import HfApi, InferenceClient
 from fastmcp import FastMCP
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
-mcp = FastMCP("Robot_MCP_Server")
-# -------------------------------
-# Upload helper
-# -------------------------------
 def upload_image(image_b64: str, hf_token: str):
     try:
         image_bytes = base64.b64decode(image_b64)
-        size_bytes = len(image_bytes)
         os.makedirs("/tmp", exist_ok=True)
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
@@ -32,7 +27,6 @@ def upload_image(image_b64: str, hf_token: str):
             f.write(image_bytes)
         filename = f"robot_{timestamp}.jpg"
         api = HfApi()
         api.upload_file(
             path_or_fileobj=local_path,
@@ -43,16 +37,13 @@ def upload_image(image_b64: str, hf_token: str):
         )
         url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
-        return local_path, url, filename, size_bytes
     except Exception:
         traceback.print_exc()
         return None, None, None, 0
-# -------------------------------
-# Safe JSON parse
-# -------------------------------
 def safe_parse_json_from_text(text: str):
     if not text:
         return None
@@ -73,42 +64,34 @@ def safe_parse_json_from_text(text: str):
         return None
-# -------------------------------
-# TRUE CORE FUNCTION
-# -------------------------------
-# -------------------------------
-# TRUE CORE FUNCTION (with objects)
-# -------------------------------
-@mcp.tool(name="robot_watch")
-def robot_watch_core(payload: Dict[str, Any]):
-    if isinstance(payload, str):
-        try:
-            payload = json.loads(payload)
-        except:
-            return {"error": "Invalid JSON payload"}
     hf_token = payload.get("hf_token")
-    if not hf_token:
-        return {"error": "hf_token missing"}
-    robot_id = payload.get("robot_id", "unknown")
     image_b64 = payload.get("image_b64")
     if not image_b64:
-        return {"error": "image_b64 missing"}
-    # Upload
     _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
     if not hf_url:
         return {"error": "Image upload failed"}
-    # VLM
     system_prompt = """
-Respond in STRICT JSON ONLY.
 {
  "description": "...",
  "human": "...",
  "environment": "...",
- "objects": []  // list of detected objects
 }
 """
@@ -121,7 +104,6 @@ Respond in STRICT JSON ONLY.
     ]
     client = InferenceClient(token=hf_token)
     try:
         resp = client.chat.completions.create(
             model=HF_VLM_MODEL,
@@ -133,45 +115,30 @@ Respond in STRICT JSON ONLY.
         return {"status": "error", "message": str(e)}
     vlm_output = resp.choices[0].message.content.strip()
-    parsed = safe_parse_json_from_text(vlm_output)
-    if parsed is None:
-        return {
-            "status": "model_no_json",
-            "vlm_raw": vlm_output,
-            "message": "Invalid JSON returned"
-        }
-    # Ensure "objects" is a list
-    objects = parsed.get("objects", [])
-    if not isinstance(objects, list):
-        objects = []
     return {
         "status": "success",
         "robot_id": robot_id,
         "file_size_bytes": size_bytes,
         "image_url": hf_url,
-        "description": parsed.get("description"),
-        "human": parsed.get("human"),
-        "environment": parsed.get("environment"),
-        "objects": objects,   # ← new field
         "vlm_raw": vlm_output
     }
-# -------------------------------
-# Gradio wrapper
-# -------------------------------
 def process_json(payload):
-    return robot_watch_core(payload)
 app = gr.Interface(
     fn=process_json,
-    inputs=gr.JSON(label="Input JSON"),
-    outputs=gr.JSON(label="Result JSON"),
     title="Robot MCP Server",
-    description="JSON endpoint for robot vision pipeline.",
     api_name="predict"
 )

 import json
 from datetime import datetime
 import traceback
+from typing import Dict, Any
 import gradio as gr
 from huggingface_hub import HfApi, InferenceClient
 from fastmcp import FastMCP
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "OppaAI/Robot_MCP")
 HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
+mcp = FastMCP("Robot_MCP_Server")   # <-- Important
 def upload_image(image_b64: str, hf_token: str):
     try:
         image_bytes = base64.b64decode(image_b64)
         os.makedirs("/tmp", exist_ok=True)
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
             f.write(image_bytes)
         filename = f"robot_{timestamp}.jpg"
         api = HfApi()
         api.upload_file(
             path_or_fileobj=local_path,
         )
         url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/tmp/{filename}"
+        return local_path, url, filename, len(image_bytes)
     except Exception:
         traceback.print_exc()
         return None, None, None, 0
 def safe_parse_json_from_text(text: str):
     if not text:
         return None
         return None
+# ---------------------------------------------------
+#  TRUE MCP TOOL — THIS must be exposed to MCP client
+# ---------------------------------------------------
+@mcp.tool(
+    name="robot_watch",
+    description="Analyze a base64 image using Qwen VLM and return structured JSON."
+)
+def robot_watch(payload: Dict[str, Any]):
     hf_token = payload.get("hf_token")
     image_b64 = payload.get("image_b64")
+    robot_id = payload.get("robot_id", "unknown")
+    if not hf_token:
+        return {"error": "Missing hf_token"}
     if not image_b64:
+        return {"error": "Missing image_b64"}
     _, hf_url, _, size_bytes = upload_image(image_b64, hf_token)
     if not hf_url:
         return {"error": "Image upload failed"}
     system_prompt = """
+Respond in STRICT JSON ONLY:
 {
  "description": "...",
  "human": "...",
  "environment": "...",
+ "objects": []
 }
 """
     ]
     client = InferenceClient(token=hf_token)
     try:
         resp = client.chat.completions.create(
             model=HF_VLM_MODEL,
         return {"status": "error", "message": str(e)}
     vlm_output = resp.choices[0].message.content.strip()
+    parsed = safe_parse_json_from_text(vlm_output) or {}
     return {
         "status": "success",
         "robot_id": robot_id,
         "file_size_bytes": size_bytes,
         "image_url": hf_url,
+        "result": parsed,
         "vlm_raw": vlm_output
     }
+# ---------------------------------------------------
+# Gradio UI — separate from MCP tool layer
+# ---------------------------------------------------
 def process_json(payload):
+    return robot_watch(payload)
 app = gr.Interface(
     fn=process_json,
+    inputs=gr.JSON(),
+    outputs=gr.JSON(),
     title="Robot MCP Server",
     api_name="predict"
 )