Spaces:
Paused
Paused
Peter Larnholt
commited on
Commit
Β·
0e074c5
1
Parent(s):
2e9c870
Enable verbose logging to diagnose chat completion errors
Browse files- Add error logging in chat_fn to see actual vLLM error responses
- Remove --disable-log-requests flag to see vLLM request logs
- This will help identify the root cause of 500 errors
app.py
CHANGED
|
@@ -26,7 +26,6 @@ VLLM_ARGS = [
|
|
| 26 |
"--max-model-len", "8192", # fits A10G 24GB
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
-
"--disable-log-requests", # reduce log noise
|
| 30 |
]
|
| 31 |
if "AWQ" in MODEL_ID.upper():
|
| 32 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
|
@@ -90,7 +89,9 @@ def chat_fn(user_message: str, history: list[dict]):
|
|
| 90 |
messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
|
| 91 |
payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
|
| 92 |
r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
|
| 93 |
-
r.
|
|
|
|
|
|
|
| 94 |
return r.json()["choices"][0]["message"]["content"]
|
| 95 |
|
| 96 |
ui = gr.ChatInterface(fn=chat_fn, title="ExCom AI β Qwen 2.5 14B AWQ (vLLM)", type="messages")
|
|
|
|
| 26 |
"--max-model-len", "8192", # fits A10G 24GB
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
|
|
|
| 29 |
]
|
| 30 |
if "AWQ" in MODEL_ID.upper():
|
| 31 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
|
|
|
| 89 |
messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
|
| 90 |
payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
|
| 91 |
r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
|
| 92 |
+
if not r.ok:
|
| 93 |
+
print(f"[ERROR] vLLM returned {r.status_code}: {r.text}")
|
| 94 |
+
return f"β Error: {r.status_code} - Check logs for details"
|
| 95 |
return r.json()["choices"][0]["message"]["content"]
|
| 96 |
|
| 97 |
ui = gr.ChatInterface(fn=chat_fn, title="ExCom AI β Qwen 2.5 14B AWQ (vLLM)", type="messages")
|