Kai Izumoto commited on
Commit
50552db
·
verified ·
1 Parent(s): 9dfa4a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -15
app.py CHANGED
@@ -22,6 +22,7 @@ from huggingface_hub import InferenceClient
22
  # Added missing imports
23
  import tempfile
24
  import zipfile
 
25
 
26
  # ---------- Config ----------
27
  PYTHON_MODEL = "Nxcode-CQ-7B-orpo"
@@ -229,9 +230,28 @@ def validate_files_dict(files: Dict[str, str]) -> bool:
229
  return False
230
  return all(isinstance(k, str) and isinstance(v, str) for k, v in files.items())
231
 
 
 
 
 
 
 
 
 
 
 
 
232
  def parse_meta(text: str) -> Optional[Dict[str, Any]]:
233
- """Parses model output to extract code files, trying structured JSON first, then falling back to heuristics."""
234
- # Strict JSON/META block parsing
 
 
 
 
 
 
 
 
235
  for pattern in [r"```json\s*(.*?)```", r"```meta\s*(.*?)```"]:
236
  match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
237
  if match:
@@ -242,23 +262,46 @@ def parse_meta(text: str) -> Optional[Dict[str, Any]]:
242
  return parsed
243
  except (json.JSONDecodeError, TypeError):
244
  continue
245
-
246
- # Fallback to heuristic parsing of code blocks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  files = {}
248
  code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text, re.DOTALL)
249
- if not code_blocks:
250
- return None
 
 
 
251
 
252
- # Try to find filenames from comments or preceding lines
253
- # This is a simplified heuristic; more advanced logic could be added
254
- potential_filenames = re.findall(r'#\s*File:\s*([\w/._-]+)', text)
255
- for i, block in enumerate(code_blocks):
256
- filename = potential_filenames[i] if i < len(potential_filenames) else f"file_{i+1}.py"
257
- files[filename] = block.strip()
258
 
259
- if validate_files_dict(files):
260
- return {"files": files, "changelog": "Extracted files via heuristic parsing."}
261
-
262
  return None
263
 
264
  # ---------- Enhanced evaluators ----------
@@ -465,6 +508,8 @@ Return the perfected code in META format."""
465
 
466
  meta = parse_meta(response)
467
  if not meta or not meta.get("files"):
 
 
468
  return {"success": False, "warning": "Parse failed - keeping previous code"}
469
 
470
  files = meta["files"]
 
22
  # Added missing imports
23
  import tempfile
24
  import zipfile
25
+ import ast
26
 
27
  # ---------- Config ----------
28
  PYTHON_MODEL = "Nxcode-CQ-7B-orpo"
 
230
  return False
231
  return all(isinstance(k, str) and isinstance(v, str) for k, v in files.items())
232
 
233
+ def _try_load_json_candidate(candidate: str) -> Optional[Dict[str, Any]]:
234
+ """Attempt to parse a JSON or Python-dict-like candidate string."""
235
+ try:
236
+ return json.loads(candidate)
237
+ except Exception:
238
+ try:
239
+ # Some models emit Python dict syntax — try literal_eval
240
+ return ast.literal_eval(candidate)
241
+ except Exception:
242
+ return None
243
+
244
  def parse_meta(text: str) -> Optional[Dict[str, Any]]:
245
+ """Parses model output to extract code files, trying structured JSON first, then falling back to heuristics.
246
+ Improvements:
247
+ - Accept JSON inside fenced blocks, fenced meta blocks
248
+ - If that fails, scan the entire text for balanced {...} and try json.loads() or ast.literal_eval()
249
+ - Fallback to code-block heuristic as before
250
+ """
251
+ if not text:
252
+ return None
253
+
254
+ # 1) Strict JSON/META block parsing (existing behavior)
255
  for pattern in [r"```json\s*(.*?)```", r"```meta\s*(.*?)```"]:
256
  match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
257
  if match:
 
262
  return parsed
263
  except (json.JSONDecodeError, TypeError):
264
  continue
265
+
266
+ # 2) Any code fence content that looks like JSON
267
+ # look for fenced blocks and attempt to parse their contents
268
+ for match in re.finditer(r"```(?:\w+)?\s*([\s\S]*?)```", text, re.DOTALL):
269
+ content = match.group(1).strip()
270
+ parsed = _try_load_json_candidate(content)
271
+ if isinstance(parsed, dict) and "files" in parsed and validate_files_dict(parsed["files"]):
272
+ return parsed
273
+
274
+ # 3) Scan the entire text for balanced JSON-like objects and try loads/literal_eval
275
+ # We'll scan for every '{' and try to find its matching '}' and parse the substring.
276
+ for m in re.finditer(r"\{", text):
277
+ start = m.start()
278
+ depth = 0
279
+ for i in range(start, len(text)):
280
+ ch = text[i]
281
+ if ch == "{":
282
+ depth += 1
283
+ elif ch == "}":
284
+ depth -= 1
285
+ if depth == 0:
286
+ candidate = text[start:i+1]
287
+ parsed = _try_load_json_candidate(candidate)
288
+ if isinstance(parsed, dict) and "files" in parsed and validate_files_dict(parsed["files"]):
289
+ return parsed
290
+ break # stop this candidate and continue scanning
291
+
292
+ # 4) Fallback to heuristic parsing of code blocks (original behavior)
293
  files = {}
294
  code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text, re.DOTALL)
295
+ if code_blocks:
296
+ potential_filenames = re.findall(r'#\s*File:\s*([\w/._-]+)', text)
297
+ for i, block in enumerate(code_blocks):
298
+ filename = potential_filenames[i] if i < len(potential_filenames) else f"file_{i+1}.py"
299
+ files[filename] = block.strip()
300
 
301
+ if validate_files_dict(files):
302
+ return {"files": files, "changelog": "Extracted files via heuristic parsing."}
 
 
 
 
303
 
304
+ # If nothing matched, return None
 
 
305
  return None
306
 
307
  # ---------- Enhanced evaluators ----------
 
508
 
509
  meta = parse_meta(response)
510
  if not meta or not meta.get("files"):
511
+ # include a sanitized snippet of response for debugging in logs (safe)
512
+ logging.warning("Parse failed. Model response (sanitized): %s", sanitize_log_message(response[:200]))
513
  return {"success": False, "warning": "Parse failed - keeping previous code"}
514
 
515
  files = meta["files"]