Spaces:
Sleeping
Sleeping
Kai Izumoto
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -149,15 +149,21 @@ def extract_chunk_content(chunk: Any) -> Optional[str]:
|
|
| 149 |
try:
|
| 150 |
if isinstance(chunk, dict) and (choices := chunk.get("choices")):
|
| 151 |
delta = choices[0].get("delta", {})
|
| 152 |
-
|
|
|
|
| 153 |
elif hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
|
| 154 |
return chunk.delta.content
|
|
|
|
|
|
|
|
|
|
| 155 |
except Exception:
|
| 156 |
return None
|
| 157 |
return None
|
| 158 |
|
| 159 |
def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
|
| 160 |
-
"""Calls the appropriate LLM with retry logic and fallbacks.
|
|
|
|
|
|
|
| 161 |
primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
|
| 162 |
models_to_try = [primary_model] + FALLBACK_MODELS
|
| 163 |
|
|
@@ -165,6 +171,43 @@ def call_model(client: InferenceClient, system: str, user: str, is_python: bool,
|
|
| 165 |
|
| 166 |
last_exception = None
|
| 167 |
for model_name in models_to_try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
try:
|
| 169 |
stream = client.chat_completion(messages, model=model_name, stream=True, **settings)
|
| 170 |
response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
|
|
@@ -172,8 +215,8 @@ def call_model(client: InferenceClient, system: str, user: str, is_python: bool,
|
|
| 172 |
return response
|
| 173 |
except Exception as e:
|
| 174 |
last_exception = e
|
| 175 |
-
write_error_log(e, f"
|
| 176 |
-
time.sleep(1)
|
| 177 |
continue
|
| 178 |
|
| 179 |
logging.error(f"All models failed. Last error: {last_exception}")
|
|
|
|
| 149 |
try:
|
| 150 |
if isinstance(chunk, dict) and (choices := chunk.get("choices")):
|
| 151 |
delta = choices[0].get("delta", {})
|
| 152 |
+
# streaming chunk structure
|
| 153 |
+
return delta.get("content") or delta.get("text")
|
| 154 |
elif hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
|
| 155 |
return chunk.delta.content
|
| 156 |
+
# sometimes streaming yields strings directly
|
| 157 |
+
if isinstance(chunk, str):
|
| 158 |
+
return chunk
|
| 159 |
except Exception:
|
| 160 |
return None
|
| 161 |
return None
|
| 162 |
|
| 163 |
def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
|
| 164 |
+
"""Calls the appropriate LLM with retry logic and fallbacks.
|
| 165 |
+
Tries non-streaming first (more reliable), falls back to streaming.
|
| 166 |
+
"""
|
| 167 |
primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
|
| 168 |
models_to_try = [primary_model] + FALLBACK_MODELS
|
| 169 |
|
|
|
|
| 171 |
|
| 172 |
last_exception = None
|
| 173 |
for model_name in models_to_try:
|
| 174 |
+
# First attempt: non-streaming call (more reliable across client versions/models)
|
| 175 |
+
try:
|
| 176 |
+
resp = client.chat_completion(messages, model=model_name, stream=False, **settings)
|
| 177 |
+
# resp can be dict-like or string; try multiple extraction methods
|
| 178 |
+
response_text = ""
|
| 179 |
+
try:
|
| 180 |
+
if isinstance(resp, dict):
|
| 181 |
+
# Common HF shapes
|
| 182 |
+
if "generated_text" in resp and isinstance(resp["generated_text"], str):
|
| 183 |
+
response_text = resp["generated_text"]
|
| 184 |
+
elif "text" in resp and isinstance(resp["text"], str):
|
| 185 |
+
response_text = resp["text"]
|
| 186 |
+
elif "choices" in resp and resp["choices"]:
|
| 187 |
+
choice = resp["choices"][0]
|
| 188 |
+
# choice may contain 'message' with 'content'
|
| 189 |
+
if isinstance(choice, dict):
|
| 190 |
+
if "message" in choice and isinstance(choice["message"], dict):
|
| 191 |
+
response_text = choice["message"].get("content") or choice["message"].get("text", "") or ""
|
| 192 |
+
else:
|
| 193 |
+
response_text = choice.get("text") or choice.get("message") or ""
|
| 194 |
+
elif isinstance(resp, str):
|
| 195 |
+
response_text = resp
|
| 196 |
+
else:
|
| 197 |
+
# Fallback to string representation
|
| 198 |
+
response_text = str(resp)
|
| 199 |
+
except Exception as e:
|
| 200 |
+
write_error_log(e, f"Non-stream parsing failed for model {model_name}")
|
| 201 |
+
|
| 202 |
+
if response_text and response_text.strip():
|
| 203 |
+
return response_text
|
| 204 |
+
except Exception as e:
|
| 205 |
+
# Save and try streaming fallback below
|
| 206 |
+
last_exception = e
|
| 207 |
+
write_error_log(e, f"Non-stream model {model_name} failed, attempting stream fallback")
|
| 208 |
+
# fall through to streaming attempt
|
| 209 |
+
|
| 210 |
+
# Streaming fallback (older code path)
|
| 211 |
try:
|
| 212 |
stream = client.chat_completion(messages, model=model_name, stream=True, **settings)
|
| 213 |
response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
|
|
|
|
| 215 |
return response
|
| 216 |
except Exception as e:
|
| 217 |
last_exception = e
|
| 218 |
+
write_error_log(e, f"Streaming model {model_name} failed")
|
| 219 |
+
time.sleep(1) # basic backoff and continue to next model
|
| 220 |
continue
|
| 221 |
|
| 222 |
logging.error(f"All models failed. Last error: {last_exception}")
|