OppaAI commited on
Commit
d722b23
·
verified ·
1 Parent(s): 0cc0fd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -28
app.py CHANGED
@@ -3,14 +3,32 @@ import base64
3
  import gradio as gr
4
  from huggingface_hub import upload_file, InferenceClient
5
  import json
 
 
 
6
 
7
  # --- Config ---
8
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
9
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # --- Helper Functions ---
12
  def save_and_upload_image(image_b64, hf_token):
13
- """Save image to /tmp and upload to HF dataset."""
14
  image_bytes = base64.b64decode(image_b64)
15
  local_tmp_path = "/tmp/tmp.jpg"
16
  with open(local_tmp_path, "wb") as f:
@@ -28,7 +46,6 @@ def save_and_upload_image(image_b64, hf_token):
28
  hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
29
  return local_tmp_path, hf_image_url, path_in_repo, len(image_bytes)
30
 
31
-
32
  # --- Main MCP function ---
33
  def process_and_describe(payload: dict):
34
  try:
@@ -41,23 +58,16 @@ def process_and_describe(payload: dict):
41
  if not image_b64:
42
  return {"error": "No image provided."}
43
 
44
- # Save & upload
45
  local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
46
-
47
- # Init HF client
48
  hf_client = InferenceClient(token=hf_token)
49
 
50
- # System prompt: describe + suggest action
51
  system_prompt = """
52
  You are a helpful robot assistant.
53
- 1. Describe the image in detail, especially humans in full detail.
54
- 2. Suggest what the robot should do next based on what it sees:
55
  - Human figure → say 'Hi'.
56
- - Ball → move towards it.
57
- - Obstacles → stop or avoid.
58
- - Animal → identify the animal and take photos
59
  Always respond in JSON:
60
- {"description": "...", "action": {"move": "...", "interact": "..."}}
61
  """
62
 
63
  messages_payload = [
@@ -68,27 +78,24 @@ def process_and_describe(payload: dict):
68
  ]}
69
  ]
70
 
71
- # Call VLM
72
  chat_completion = hf_client.chat.completions.create(
73
  model=HF_VLM_MODEL,
74
  messages=messages_payload,
75
- max_tokens=300
76
  )
77
 
78
- # Robustly extract text
79
- try:
80
- vlm_text = chat_completion.choices[0].message.content.strip()
81
- except Exception:
82
- # fallback if structure is different
83
- vlm_text = str(chat_completion)
84
-
85
- # Attempt to parse JSON from VLM
86
  action_data = {}
87
  try:
88
  action_data = json.loads(vlm_text)
89
  except Exception:
90
- # If VLM didn't return valid JSON, wrap text as description
91
- action_data = {"description": vlm_text, "action": {"move": "unknown", "interact": "unknown"}}
 
 
 
 
 
92
 
93
  return {
94
  "saved_to_hf_hub": True,
@@ -98,18 +105,18 @@ def process_and_describe(payload: dict):
98
  "file_size_bytes": size_bytes,
99
  "robot_id": robot_id,
100
  "vlm_response": vlm_text,
101
- "vlm_action": action_data.get("action", {}),
102
- "vlm_description": action_data.get("description", "")
 
103
  }
104
 
105
  except Exception as e:
106
  return {"error": f"An API error occurred: {str(e)}"}
107
 
108
-
109
  # --- Gradio MCP Interface ---
110
  demo = gr.Interface(
111
  fn=process_and_describe,
112
- inputs=gr.JSON(label="Input Payload (Dict format with 'image_b64')"),
113
  outputs=gr.JSON(label="Reply to Jetson"),
114
  api_name="predict"
115
  )
 
3
  import gradio as gr
4
  from huggingface_hub import upload_file, InferenceClient
5
  import json
6
+ from fastmcp import MCP, MCPClient
7
+ from playsound import playsound
8
+ from gtts import gTTS
9
 
10
  # --- Config ---
11
  HF_DATASET_REPO = "OppaAI/Robot_MCP"
12
  HF_VLM_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
13
 
14
+ # --- MCP server instance ---
15
+ mcp = MCP() # 用於定義工具
16
+
17
+ # --- MCP Tool ---
18
+ @mcp.tools()
19
+ def say_hi(text="Hi!"):
20
+ # 1️⃣ 生成 mp3
21
+ tts = gTTS(text=text, lang="en")
22
+ tmp_path = "/tmp/say_hi.mp3"
23
+ tts.save(tmp_path)
24
+
25
+ # 2️⃣ 播放音檔
26
+ playsound(tmp_path)
27
+
28
+ return f"Played: {text}"
29
+
30
  # --- Helper Functions ---
31
  def save_and_upload_image(image_b64, hf_token):
 
32
  image_bytes = base64.b64decode(image_b64)
33
  local_tmp_path = "/tmp/tmp.jpg"
34
  with open(local_tmp_path, "wb") as f:
 
46
  hf_image_url = f"https://huggingface.co/datasets/{HF_DATASET_REPO}/resolve/main/{path_in_repo}"
47
  return local_tmp_path, hf_image_url, path_in_repo, len(image_bytes)
48
 
 
49
  # --- Main MCP function ---
50
  def process_and_describe(payload: dict):
51
  try:
 
58
  if not image_b64:
59
  return {"error": "No image provided."}
60
 
 
61
  local_tmp_path, hf_url, path_in_repo, size_bytes = save_and_upload_image(image_b64, hf_token)
 
 
62
  hf_client = InferenceClient(token=hf_token)
63
 
 
64
  system_prompt = """
65
  You are a helpful robot assistant.
66
+ 1. Describe the image in detail.
67
+ 2. Suggest what the robot should do next.
68
  - Human figure → say 'Hi'.
 
 
 
69
  Always respond in JSON:
70
+ {"description": "...", "action": "say_hi"}
71
  """
72
 
73
  messages_payload = [
 
78
  ]}
79
  ]
80
 
 
81
  chat_completion = hf_client.chat.completions.create(
82
  model=HF_VLM_MODEL,
83
  messages=messages_payload,
84
+ max_tokens=200
85
  )
86
 
87
+ vlm_text = chat_completion.choices[0].message.content.strip()
 
 
 
 
 
 
 
88
  action_data = {}
89
  try:
90
  action_data = json.loads(vlm_text)
91
  except Exception:
92
+ action_data = {"description": vlm_text, "action": "unknown"}
93
+
94
+ # --- Call MCP tool ---
95
+ vlm_action = action_data.get("action")
96
+ tool_result = None
97
+ if vlm_action == "say_hi":
98
+ tool_result = say_hi(text="Hi!") # 這裡會生成 /tmp/say_hi.mp3
99
 
100
  return {
101
  "saved_to_hf_hub": True,
 
105
  "file_size_bytes": size_bytes,
106
  "robot_id": robot_id,
107
  "vlm_response": vlm_text,
108
+ "vlm_action": vlm_action,
109
+ "vlm_description": action_data.get("description", ""),
110
+ "tool_result": tool_result
111
  }
112
 
113
  except Exception as e:
114
  return {"error": f"An API error occurred: {str(e)}"}
115
 
 
116
  # --- Gradio MCP Interface ---
117
  demo = gr.Interface(
118
  fn=process_and_describe,
119
+ inputs=gr.JSON(label="Input Payload"),
120
  outputs=gr.JSON(label="Reply to Jetson"),
121
  api_name="predict"
122
  )