Spaces:

TrueDevSprint
/

InfinateCodeGenerator

Sleeping

App Files Files Community

Kai Izumoto commited on Oct 2, 2025

Commit

50552db

verified ·

1 Parent(s): 9dfa4a6

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -15

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ from huggingface_hub import InferenceClient
 # Added missing imports
 import tempfile
 import zipfile
 # ---------- Config ----------
 PYTHON_MODEL = "Nxcode-CQ-7B-orpo"
@@ -229,9 +230,28 @@ def validate_files_dict(files: Dict[str, str]) -> bool:
         return False
     return all(isinstance(k, str) and isinstance(v, str) for k, v in files.items())
 def parse_meta(text: str) -> Optional[Dict[str, Any]]:
-    """Parses model output to extract code files, trying structured JSON first, then falling back to heuristics."""
-    # Strict JSON/META block parsing
     for pattern in [r"```json\s*(.*?)```", r"```meta\s*(.*?)```"]:
         match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
         if match:
@@ -242,23 +262,46 @@ def parse_meta(text: str) -> Optional[Dict[str, Any]]:
                     return parsed
             except (json.JSONDecodeError, TypeError):
                 continue
-    # Fallback to heuristic parsing of code blocks
     files = {}
     code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text, re.DOTALL)
-    if not code_blocks:
-        return None
-    # Try to find filenames from comments or preceding lines
-    # This is a simplified heuristic; more advanced logic could be added
-    potential_filenames = re.findall(r'#\s*File:\s*([\w/._-]+)', text)
-    for i, block in enumerate(code_blocks):
-        filename = potential_filenames[i] if i < len(potential_filenames) else f"file_{i+1}.py"
-        files[filename] = block.strip()
-    if validate_files_dict(files):
-        return {"files": files, "changelog": "Extracted files via heuristic parsing."}
     return None
 # ---------- Enhanced evaluators ----------
@@ -465,6 +508,8 @@ Return the perfected code in META format."""
             meta = parse_meta(response)
             if not meta or not meta.get("files"):
                 return {"success": False, "warning": "Parse failed - keeping previous code"}
             files = meta["files"]

 # Added missing imports
 import tempfile
 import zipfile
+import ast
 # ---------- Config ----------
 PYTHON_MODEL = "Nxcode-CQ-7B-orpo"
         return False
     return all(isinstance(k, str) and isinstance(v, str) for k, v in files.items())
+def _try_load_json_candidate(candidate: str) -> Optional[Dict[str, Any]]:
+    """Attempt to parse a JSON or Python-dict-like candidate string."""
+    try:
+        return json.loads(candidate)
+    except Exception:
+        try:
+            # Some models emit Python dict syntax — try literal_eval
+            return ast.literal_eval(candidate)
+        except Exception:
+            return None
 def parse_meta(text: str) -> Optional[Dict[str, Any]]:
+    """Parses model output to extract code files, trying structured JSON first, then falling back to heuristics.
+    Improvements:
+    - Accept JSON inside fenced blocks, fenced meta blocks
+    - If that fails, scan the entire text for balanced {...} and try json.loads() or ast.literal_eval()
+    - Fallback to code-block heuristic as before
+    """
+    if not text:
+        return None
+    # 1) Strict JSON/META block parsing (existing behavior)
     for pattern in [r"```json\s*(.*?)```", r"```meta\s*(.*?)```"]:
         match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
         if match:
                     return parsed
             except (json.JSONDecodeError, TypeError):
                 continue
+    # 2) Any code fence content that looks like JSON
+    # look for fenced blocks and attempt to parse their contents
+    for match in re.finditer(r"```(?:\w+)?\s*([\s\S]*?)```", text, re.DOTALL):
+        content = match.group(1).strip()
+        parsed = _try_load_json_candidate(content)
+        if isinstance(parsed, dict) and "files" in parsed and validate_files_dict(parsed["files"]):
+            return parsed
+    # 3) Scan the entire text for balanced JSON-like objects and try loads/literal_eval
+    # We'll scan for every '{' and try to find its matching '}' and parse the substring.
+    for m in re.finditer(r"\{", text):
+        start = m.start()
+        depth = 0
+        for i in range(start, len(text)):
+            ch = text[i]
+            if ch == "{":
+                depth += 1
+            elif ch == "}":
+                depth -= 1
+                if depth == 0:
+                    candidate = text[start:i+1]
+                    parsed = _try_load_json_candidate(candidate)
+                    if isinstance(parsed, dict) and "files" in parsed and validate_files_dict(parsed["files"]):
+                        return parsed
+                    break  # stop this candidate and continue scanning
+    # 4) Fallback to heuristic parsing of code blocks (original behavior)
     files = {}
     code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text, re.DOTALL)
+    if code_blocks:
+        potential_filenames = re.findall(r'#\s*File:\s*([\w/._-]+)', text)
+        for i, block in enumerate(code_blocks):
+            filename = potential_filenames[i] if i < len(potential_filenames) else f"file_{i+1}.py"
+            files[filename] = block.strip()
+        if validate_files_dict(files):
+            return {"files": files, "changelog": "Extracted files via heuristic parsing."}
+    # If nothing matched, return None
     return None
 # ---------- Enhanced evaluators ----------
             meta = parse_meta(response)
             if not meta or not meta.get("files"):
+                # include a sanitized snippet of response for debugging in logs (safe)
+                logging.warning("Parse failed. Model response (sanitized): %s", sanitize_log_message(response[:200]))
                 return {"success": False, "warning": "Parse failed - keeping previous code"}
             files = meta["files"]