Kai Izumoto commited on
Commit
9dfa4a6
·
verified ·
1 Parent(s): bf1cbe5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -4
app.py CHANGED
@@ -149,15 +149,21 @@ def extract_chunk_content(chunk: Any) -> Optional[str]:
149
  try:
150
  if isinstance(chunk, dict) and (choices := chunk.get("choices")):
151
  delta = choices[0].get("delta", {})
152
- return delta.get("content")
 
153
  elif hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
154
  return chunk.delta.content
 
 
 
155
  except Exception:
156
  return None
157
  return None
158
 
159
  def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
160
- """Calls the appropriate LLM with retry logic and fallbacks."""
 
 
161
  primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
162
  models_to_try = [primary_model] + FALLBACK_MODELS
163
 
@@ -165,6 +171,43 @@ def call_model(client: InferenceClient, system: str, user: str, is_python: bool,
165
 
166
  last_exception = None
167
  for model_name in models_to_try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  try:
169
  stream = client.chat_completion(messages, model=model_name, stream=True, **settings)
170
  response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
@@ -172,8 +215,8 @@ def call_model(client: InferenceClient, system: str, user: str, is_python: bool,
172
  return response
173
  except Exception as e:
174
  last_exception = e
175
- write_error_log(e, f"Model {model_name} failed")
176
- time.sleep(1) # Simple backoff
177
  continue
178
 
179
  logging.error(f"All models failed. Last error: {last_exception}")
 
149
  try:
150
  if isinstance(chunk, dict) and (choices := chunk.get("choices")):
151
  delta = choices[0].get("delta", {})
152
+ # streaming chunk structure
153
+ return delta.get("content") or delta.get("text")
154
  elif hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
155
  return chunk.delta.content
156
+ # sometimes streaming yields strings directly
157
+ if isinstance(chunk, str):
158
+ return chunk
159
  except Exception:
160
  return None
161
  return None
162
 
163
  def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
164
+ """Calls the appropriate LLM with retry logic and fallbacks.
165
+ Tries non-streaming first (more reliable), falls back to streaming.
166
+ """
167
  primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
168
  models_to_try = [primary_model] + FALLBACK_MODELS
169
 
 
171
 
172
  last_exception = None
173
  for model_name in models_to_try:
174
+ # First attempt: non-streaming call (more reliable across client versions/models)
175
+ try:
176
+ resp = client.chat_completion(messages, model=model_name, stream=False, **settings)
177
+ # resp can be dict-like or string; try multiple extraction methods
178
+ response_text = ""
179
+ try:
180
+ if isinstance(resp, dict):
181
+ # Common HF shapes
182
+ if "generated_text" in resp and isinstance(resp["generated_text"], str):
183
+ response_text = resp["generated_text"]
184
+ elif "text" in resp and isinstance(resp["text"], str):
185
+ response_text = resp["text"]
186
+ elif "choices" in resp and resp["choices"]:
187
+ choice = resp["choices"][0]
188
+ # choice may contain 'message' with 'content'
189
+ if isinstance(choice, dict):
190
+ if "message" in choice and isinstance(choice["message"], dict):
191
+ response_text = choice["message"].get("content") or choice["message"].get("text", "") or ""
192
+ else:
193
+ response_text = choice.get("text") or choice.get("message") or ""
194
+ elif isinstance(resp, str):
195
+ response_text = resp
196
+ else:
197
+ # Fallback to string representation
198
+ response_text = str(resp)
199
+ except Exception as e:
200
+ write_error_log(e, f"Non-stream parsing failed for model {model_name}")
201
+
202
+ if response_text and response_text.strip():
203
+ return response_text
204
+ except Exception as e:
205
+ # Save and try streaming fallback below
206
+ last_exception = e
207
+ write_error_log(e, f"Non-stream model {model_name} failed, attempting stream fallback")
208
+ # fall through to streaming attempt
209
+
210
+ # Streaming fallback (older code path)
211
  try:
212
  stream = client.chat_completion(messages, model=model_name, stream=True, **settings)
213
  response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
 
215
  return response
216
  except Exception as e:
217
  last_exception = e
218
+ write_error_log(e, f"Streaming model {model_name} failed")
219
+ time.sleep(1) # basic backoff and continue to next model
220
  continue
221
 
222
  logging.error(f"All models failed. Last error: {last_exception}")