Spaces:

plarnholt
/

excom-ai-demo

Paused

Peter Larnholt commited on Oct 9

Commit

0e074c5

1 Parent(s): 2e9c870

Enable verbose logging to diagnose chat completion errors

- Add error logging in chat_fn to see actual vLLM error responses
- Remove --disable-log-requests flag to see vLLM request logs
- This will help identify the root cause of 500 errors

Files changed (1) hide show

app.py +3 -2

app.py CHANGED Viewed

@@ -26,7 +26,6 @@ VLLM_ARGS = [
     "--max-model-len", "8192",               # fits A10G 24GB
     "--gpu-memory-utilization", "0.90",
     "--trust-remote-code",
-    "--disable-log-requests",                # reduce log noise
 ]
 if "AWQ" in MODEL_ID.upper():
     VLLM_ARGS += ["--quantization", "awq_marlin"]  # faster AWQ kernel if available
@@ -90,7 +89,9 @@ def chat_fn(user_message: str, history: list[dict]):
     messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
     payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
     r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
-    r.raise_for_status()
     return r.json()["choices"][0]["message"]["content"]
 ui = gr.ChatInterface(fn=chat_fn, title="ExCom AI — Qwen 2.5 14B AWQ (vLLM)", type="messages")

     "--max-model-len", "8192",               # fits A10G 24GB
     "--gpu-memory-utilization", "0.90",
     "--trust-remote-code",
 ]
 if "AWQ" in MODEL_ID.upper():
     VLLM_ARGS += ["--quantization", "awq_marlin"]  # faster AWQ kernel if available
     messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
     payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
     r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
+    if not r.ok:
+        print(f"[ERROR] vLLM returned {r.status_code}: {r.text}")
+        return f"❌ Error: {r.status_code} - Check logs for details"
     return r.json()["choices"][0]["message"]["content"]
 ui = gr.ChatInterface(fn=chat_fn, title="ExCom AI — Qwen 2.5 14B AWQ (vLLM)", type="messages")