Spaces:

TrueDevSprint
/

InfinateCodeGenerator

Sleeping

App Files Files Community

Kai Izumoto commited on Oct 2, 2025

Commit

9dfa4a6

verified ·

1 Parent(s): bf1cbe5

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -4

app.py CHANGED Viewed

@@ -149,15 +149,21 @@ def extract_chunk_content(chunk: Any) -> Optional[str]:
     try:
         if isinstance(chunk, dict) and (choices := chunk.get("choices")):
             delta = choices[0].get("delta", {})
-            return delta.get("content")
         elif hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
             return chunk.delta.content
     except Exception:
         return None
     return None
 def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
-    """Calls the appropriate LLM with retry logic and fallbacks."""
     primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
     models_to_try = [primary_model] + FALLBACK_MODELS
@@ -165,6 +171,43 @@ def call_model(client: InferenceClient, system: str, user: str, is_python: bool,
     last_exception = None
     for model_name in models_to_try:
         try:
             stream = client.chat_completion(messages, model=model_name, stream=True, **settings)
             response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
@@ -172,8 +215,8 @@ def call_model(client: InferenceClient, system: str, user: str, is_python: bool,
                 return response
         except Exception as e:
             last_exception = e
-            write_error_log(e, f"Model {model_name} failed")
-            time.sleep(1) # Simple backoff
             continue
     logging.error(f"All models failed. Last error: {last_exception}")

     try:
         if isinstance(chunk, dict) and (choices := chunk.get("choices")):
             delta = choices[0].get("delta", {})
+            # streaming chunk structure
+            return delta.get("content") or delta.get("text")
         elif hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
             return chunk.delta.content
+        # sometimes streaming yields strings directly
+        if isinstance(chunk, str):
+            return chunk
     except Exception:
         return None
     return None
 def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
+    """Calls the appropriate LLM with retry logic and fallbacks.
+    Tries non-streaming first (more reliable), falls back to streaming.
+    """
     primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
     models_to_try = [primary_model] + FALLBACK_MODELS
     last_exception = None
     for model_name in models_to_try:
+        # First attempt: non-streaming call (more reliable across client versions/models)
+        try:
+            resp = client.chat_completion(messages, model=model_name, stream=False, **settings)
+            # resp can be dict-like or string; try multiple extraction methods
+            response_text = ""
+            try:
+                if isinstance(resp, dict):
+                    # Common HF shapes
+                    if "generated_text" in resp and isinstance(resp["generated_text"], str):
+                        response_text = resp["generated_text"]
+                    elif "text" in resp and isinstance(resp["text"], str):
+                        response_text = resp["text"]
+                    elif "choices" in resp and resp["choices"]:
+                        choice = resp["choices"][0]
+                        # choice may contain 'message' with 'content'
+                        if isinstance(choice, dict):
+                            if "message" in choice and isinstance(choice["message"], dict):
+                                response_text = choice["message"].get("content") or choice["message"].get("text", "") or ""
+                            else:
+                                response_text = choice.get("text") or choice.get("message") or ""
+                elif isinstance(resp, str):
+                    response_text = resp
+                else:
+                    # Fallback to string representation
+                    response_text = str(resp)
+            except Exception as e:
+                write_error_log(e, f"Non-stream parsing failed for model {model_name}")
+            if response_text and response_text.strip():
+                return response_text
+        except Exception as e:
+            # Save and try streaming fallback below
+            last_exception = e
+            write_error_log(e, f"Non-stream model {model_name} failed, attempting stream fallback")
+            # fall through to streaming attempt
+        # Streaming fallback (older code path)
         try:
             stream = client.chat_completion(messages, model=model_name, stream=True, **settings)
             response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
                 return response
         except Exception as e:
             last_exception = e
+            write_error_log(e, f"Streaming model {model_name} failed")
+            time.sleep(1)  # basic backoff and continue to next model
             continue
     logging.error(f"All models failed. Last error: {last_exception}")