Peter Larnholt commited on
Commit
0e074c5
Β·
1 Parent(s): 2e9c870

Enable verbose logging to diagnose chat completion errors

Browse files

- Add error logging in chat_fn to see actual vLLM error responses
- Remove --disable-log-requests flag to see vLLM request logs
- This will help identify the root cause of 500 errors

Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -26,7 +26,6 @@ VLLM_ARGS = [
26
  "--max-model-len", "8192", # fits A10G 24GB
27
  "--gpu-memory-utilization", "0.90",
28
  "--trust-remote-code",
29
- "--disable-log-requests", # reduce log noise
30
  ]
31
  if "AWQ" in MODEL_ID.upper():
32
  VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
@@ -90,7 +89,9 @@ def chat_fn(user_message: str, history: list[dict]):
90
  messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
91
  payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
92
  r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
93
- r.raise_for_status()
 
 
94
  return r.json()["choices"][0]["message"]["content"]
95
 
96
  ui = gr.ChatInterface(fn=chat_fn, title="ExCom AI β€” Qwen 2.5 14B AWQ (vLLM)", type="messages")
 
26
  "--max-model-len", "8192", # fits A10G 24GB
27
  "--gpu-memory-utilization", "0.90",
28
  "--trust-remote-code",
 
29
  ]
30
  if "AWQ" in MODEL_ID.upper():
31
  VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
 
89
  messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
90
  payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
91
  r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
92
+ if not r.ok:
93
+ print(f"[ERROR] vLLM returned {r.status_code}: {r.text}")
94
+ return f"❌ Error: {r.status_code} - Check logs for details"
95
  return r.json()["choices"][0]["message"]["content"]
96
 
97
  ui = gr.ChatInterface(fn=chat_fn, title="ExCom AI β€” Qwen 2.5 14B AWQ (vLLM)", type="messages")