Nekochu commited on
Commit
9c293be
·
1 Parent(s): 92f884a

fix: parse multipart/mixed response from /understand (was expecting JSON, got multipart)

Browse files
Files changed (1) hide show
  1. app.py +38 -28
app.py CHANGED
@@ -151,36 +151,46 @@ def _caption_via_understand(audio_path, timeout=600, cancel_check=None):
151
  logger.warning("[Caption] %s: /understand -> %s (%.0fs)", fname, status, elapsed)
152
  return None
153
 
154
- # Result may be in the poll response itself or need separate fetch
155
- result = None
156
-
157
- # Check if poll response contains the result directly
158
- if poll_data and isinstance(poll_data, dict):
159
- if poll_data.get("caption"):
160
- result = poll_data
161
- elif poll_data.get("result") and isinstance(poll_data["result"], dict):
162
- result = poll_data["result"]
163
-
164
- # Fallback: try fetching result separately
165
- if not result:
166
- try:
167
- r = _fetch_result(job_id, timeout=120)
168
- if r.status_code == 200 and r.text.strip():
169
- data = r.json()
170
- if isinstance(data, dict):
171
- if data.get("caption"):
172
- result = data
173
- elif data.get("result") and isinstance(data["result"], dict):
174
- result = data["result"]
175
- except Exception as exc:
176
- logger.warning("[Caption] %s: result fetch failed: %s", fname, exc)
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- if result:
179
- logger.info("[Caption] %s: got caption (%d chars)", fname, len(result.get("caption", "")))
180
- return result
181
 
182
- logger.warning("[Caption] %s: no caption in response. poll_keys=%s",
183
- fname, list(poll_data.keys()) if isinstance(poll_data, dict) else "N/A")
184
  return None
185
 
186
 
 
151
  logger.warning("[Caption] %s: /understand -> %s (%.0fs)", fname, status, elapsed)
152
  return None
153
 
154
+ # Fetch result /understand returns multipart/mixed (JSON + latents)
155
+ try:
156
+ r = _fetch_result(job_id, timeout=120)
157
+ if r.status_code != 200:
158
+ logger.warning("[Caption] %s: result fetch HTTP %d", fname, r.status_code)
159
+ return None
160
+
161
+ content_type = r.headers.get("Content-Type", "")
162
+
163
+ # multipart/mixed: extract JSON part (caption metadata)
164
+ if "multipart" in content_type:
165
+ boundary = None
166
+ for part in content_type.split(";"):
167
+ part = part.strip()
168
+ if part.startswith("boundary="):
169
+ boundary = part.split("=", 1)[1].strip('"')
170
+ if boundary:
171
+ import re
172
+ parts = r.content.split(f"--{boundary}".encode())
173
+ for part in parts:
174
+ if b"application/json" in part:
175
+ json_start = part.find(b"{")
176
+ json_end = part.rfind(b"}") + 1
177
+ if json_start >= 0 and json_end > json_start:
178
+ data = json.loads(part[json_start:json_end])
179
+ if isinstance(data, dict) and data.get("caption"):
180
+ logger.info("[Caption] %s: got caption (%d chars)",
181
+ fname, len(data["caption"]))
182
+ return data
183
+
184
+ # Plain JSON fallback
185
+ if r.text.strip():
186
+ data = r.json()
187
+ if isinstance(data, dict) and data.get("caption"):
188
+ return data
189
 
190
+ except Exception as exc:
191
+ logger.warning("[Caption] %s: result parse failed: %s", fname, exc)
 
192
 
193
+ logger.warning("[Caption] %s: no caption extracted from result", fname)
 
194
  return None
195
 
196