Spaces:
Sleeping
Sleeping
Kai Izumoto
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
-
Consolidated, hardened, and production-ready version.
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
import sys
|
|
@@ -149,12 +149,20 @@ def extract_chunk_content(chunk: Any) -> Optional[str]:
|
|
| 149 |
"""Extracts content from various possible streaming chunk formats."""
|
| 150 |
try:
|
| 151 |
if isinstance(chunk, dict) and (choices := chunk.get("choices")):
|
|
|
|
| 152 |
delta = choices[0].get("delta", {})
|
| 153 |
-
# streaming chunk structure
|
| 154 |
return delta.get("content") or delta.get("text")
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
return chunk.delta.content
|
| 157 |
-
# sometimes streaming yields strings directly
|
| 158 |
if isinstance(chunk, str):
|
| 159 |
return chunk
|
| 160 |
except Exception:
|
|
@@ -162,86 +170,167 @@ def extract_chunk_content(chunk: Any) -> Optional[str]:
|
|
| 162 |
return None
|
| 163 |
|
| 164 |
def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
|
| 165 |
-
"""Calls the appropriate LLM with retry logic and fallbacks.
|
| 166 |
Tries non-streaming first (more reliable), falls back to streaming.
|
| 167 |
"""
|
|
|
|
|
|
|
|
|
|
| 168 |
primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
|
| 169 |
-
models_to_try = [primary_model] + FALLBACK_MODELS
|
| 170 |
-
|
| 171 |
logging.info(f"Calling model for {'Python' if is_python else 'Other'} project. Primary: {primary_model}")
|
| 172 |
-
logging.
|
| 173 |
-
|
| 174 |
messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
| 175 |
-
|
| 176 |
-
#
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
last_exception = None
|
|
|
|
| 186 |
for model_name in models_to_try:
|
| 187 |
-
|
| 188 |
-
try
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
# resp can be dict-like or string; try multiple extraction methods
|
| 192 |
-
response_text = ""
|
| 193 |
try:
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
else:
|
| 207 |
-
response_text =
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
else:
|
| 211 |
-
|
| 212 |
-
|
| 213 |
except Exception as e:
|
| 214 |
-
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
-
# Streaming fallback (older code path)
|
| 229 |
-
try:
|
| 230 |
-
logging.info(f"Attempting streaming call to {model_name}")
|
| 231 |
-
stream = client.chat_completion(messages, model=model_name, stream=True, **valid_settings)
|
| 232 |
-
response = "".join(piece for chunk in stream if (piece := extract_chunk_content(chunk)))
|
| 233 |
-
if response.strip():
|
| 234 |
-
logging.info(f"✓ Successfully got streaming response from {model_name} ({len(response)} chars)")
|
| 235 |
-
return response
|
| 236 |
-
else:
|
| 237 |
-
logging.warning(f"Streaming returned empty response from {model_name}")
|
| 238 |
-
except Exception as e:
|
| 239 |
-
last_exception = e
|
| 240 |
-
write_error_log(e, f"Streaming model {model_name} failed")
|
| 241 |
-
logging.error(f"Streaming error for {model_name}: {str(e)[:200]}")
|
| 242 |
-
time.sleep(1) # basic backoff and continue to next model
|
| 243 |
-
continue
|
| 244 |
-
|
| 245 |
logging.error(f"❌ ALL MODELS FAILED. Last error: {last_exception}")
|
| 246 |
return f"<<ERROR: All models failed. Last error: {sanitize_log_message(str(last_exception))}>>"
|
| 247 |
|
|
@@ -254,8 +343,11 @@ def validate_files_dict(files: Dict[str, str]) -> bool:
|
|
| 254 |
|
| 255 |
def parse_meta(text: str) -> Optional[Dict[str, Any]]:
|
| 256 |
"""Parses model output to extract code files, trying structured JSON first, then falling back to heuristics."""
|
|
|
|
|
|
|
|
|
|
| 257 |
# Strict JSON/META block parsing
|
| 258 |
-
for pattern in [r"```json\s*(.*?)```", r"```meta\s*(.*?)```", r"```META\s*(.*?)```"]:
|
| 259 |
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
| 260 |
if match:
|
| 261 |
try:
|
|
@@ -267,24 +359,40 @@ def parse_meta(text: str) -> Optional[Dict[str, Any]]:
|
|
| 267 |
except (json.JSONDecodeError, TypeError) as e:
|
| 268 |
logging.warning(f"JSON parse failed: {e}")
|
| 269 |
continue
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
# Fallback to heuristic parsing of code blocks
|
| 272 |
files = {}
|
| 273 |
-
|
| 274 |
# Try to find filename markers before code blocks
|
| 275 |
-
# Patterns like: # File: main.py or ## main.py or **main.py**
|
| 276 |
filename_patterns = [
|
| 277 |
r'#\s*[Ff]ile:\s*([\w/._-]+\.[\w]+)',
|
| 278 |
r'##\s*([\w/._-]+\.[\w]+)',
|
| 279 |
-
r'\*\*\s*([\w/._-]+\.[\w]+)\s*\*\*'
|
|
|
|
| 280 |
]
|
| 281 |
-
|
| 282 |
all_filenames = []
|
| 283 |
for pattern in filename_patterns:
|
| 284 |
-
all_filenames.extend(re.findall(pattern, text))
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
if not code_blocks:
|
| 289 |
logging.warning("No code blocks found in model response")
|
| 290 |
return None
|
|
@@ -294,41 +402,49 @@ def parse_meta(text: str) -> Optional[Dict[str, Any]]:
|
|
| 294 |
block_content = block.strip()
|
| 295 |
if not block_content:
|
| 296 |
continue
|
| 297 |
-
|
| 298 |
if i < len(all_filenames):
|
| 299 |
filename = all_filenames[i]
|
| 300 |
else:
|
| 301 |
# Guess filename based on content
|
| 302 |
if "def test_" in block_content or "import pytest" in block_content:
|
| 303 |
-
filename = f"test_main.py"
|
| 304 |
elif "requirements" in text.lower() and i == 0:
|
| 305 |
filename = "requirements.txt"
|
| 306 |
-
elif "# README" in block_content or "
|
| 307 |
filename = "README.md"
|
| 308 |
else:
|
| 309 |
filename = f"main.py" if i == 0 else f"file_{i}.py"
|
| 310 |
-
|
|
|
|
| 311 |
files[filename] = block_content
|
| 312 |
|
| 313 |
if validate_files_dict(files) and files:
|
| 314 |
logging.info(f"Heuristic parsing extracted {len(files)} files: {list(files.keys())}")
|
| 315 |
return {"files": files, "changelog": "Extracted files via heuristic parsing."}
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
logging.error("Failed to extract any valid files from model response")
|
| 318 |
return None
|
| 319 |
|
| 320 |
# ---------- Enhanced evaluators ----------
|
| 321 |
def run_evaluators(workdir: Path) -> Dict[str, Any]:
|
| 322 |
out = {}
|
| 323 |
-
|
| 324 |
rc, txt = run_cmd([sys.executable, "-m", "flake8", ".", "--count", "--max-line-length=100"], cwd=str(workdir))
|
| 325 |
out["flake8_pass"] = rc == 0
|
| 326 |
out["flake8_out"] = txt
|
| 327 |
-
|
| 328 |
rc, txt = run_cmd([sys.executable, "-m", "bandit", "-r", ".", "-f", "txt"], cwd=str(workdir))
|
| 329 |
out["bandit_pass"] = rc == 0 or "No issues" in txt
|
| 330 |
out["bandit_out"] = txt
|
| 331 |
-
|
| 332 |
test_files = list(workdir.glob("**/test_*.py")) + list(workdir.glob("**/*_test.py"))
|
| 333 |
if test_files:
|
| 334 |
rc, txt = run_cmd([sys.executable, "-m", "pytest", "--maxfail=1", "--tb=short"], cwd=str(workdir))
|
|
@@ -336,10 +452,10 @@ def run_evaluators(workdir: Path) -> Dict[str, Any]:
|
|
| 336 |
else:
|
| 337 |
out["pytest_pass"] = False
|
| 338 |
out["pytest_out"] = txt if test_files else "No tests"
|
| 339 |
-
|
| 340 |
rc, txt = run_cmd([sys.executable, "-m", "black", "--check", "."], cwd=str(workdir))
|
| 341 |
out["black_pass"] = rc == 0
|
| 342 |
-
|
| 343 |
complexity = 5.0
|
| 344 |
rc, txt = run_cmd([sys.executable, "-m", "radon", "cc", ".", "-s", "-a"], cwd=str(workdir))
|
| 345 |
if rc == 0:
|
|
@@ -350,16 +466,16 @@ def run_evaluators(workdir: Path) -> Dict[str, Any]:
|
|
| 350 |
except:
|
| 351 |
pass
|
| 352 |
out["complexity"] = complexity
|
| 353 |
-
|
| 354 |
# Calculate weighted score
|
| 355 |
style = 100.0 if (out["flake8_pass"] and out["black_pass"]) else 50.0
|
| 356 |
security = 100.0 if out["bandit_pass"] else 30.0
|
| 357 |
tests = 100.0 if out["pytest_pass"] else 20.0
|
| 358 |
maintainability = max(0.0, 100.0 - (complexity - 5.0) * 10.0) if complexity > 5 else 100.0
|
| 359 |
-
|
| 360 |
w = EVAL_WEIGHTS
|
| 361 |
score = w["style"] * style + w["security"] * security + w["tests"] * tests + w["maintainability"] * maintainability
|
| 362 |
-
|
| 363 |
out["quality_score"] = round(max(0.0, min(100.0, score)), 1)
|
| 364 |
out["breakdown"] = {
|
| 365 |
"style": round(style, 1),
|
|
@@ -367,7 +483,7 @@ def run_evaluators(workdir: Path) -> Dict[str, Any]:
|
|
| 367 |
"tests": round(tests, 1),
|
| 368 |
"maintainability": round(maintainability, 1)
|
| 369 |
}
|
| 370 |
-
|
| 371 |
return out
|
| 372 |
|
| 373 |
# ---------- AI features ----------
|
|
@@ -380,7 +496,7 @@ def generate_code_review(client: Optional[InferenceClient], token: str, files: D
|
|
| 380 |
Quality: Flake8={'Pass' if eval_results.get('flake8_pass') else 'Fail'}, Tests={'Pass' if eval_results.get('pytest_pass') else 'Fail'}
|
| 381 |
|
| 382 |
Give 2-3 specific, actionable improvements:"""
|
| 383 |
-
review = call_model(client, "You are a senior code reviewer.", prompt, is_python,
|
| 384 |
return review if review and "<<ERROR" not in review else "No review"
|
| 385 |
|
| 386 |
def generate_readme(client: Optional[InferenceClient], token: str, goal: str, files: Dict[str, str], is_python: bool) -> str:
|
|
@@ -392,7 +508,7 @@ Files:
|
|
| 392 |
{summary}
|
| 393 |
|
| 394 |
Include: description, installation, usage."""
|
| 395 |
-
readme = call_model(client, "You are a technical writer.", prompt, is_python,
|
| 396 |
return readme if readme and "<<ERROR" not in readme else "# Project\n\nGenerated code."
|
| 397 |
|
| 398 |
def create_initial_scaffold(client: Optional[InferenceClient], token: str, goal: str, is_python: bool) -> Optional[Dict[str, Any]]:
|
|
@@ -406,12 +522,15 @@ Create Version 0.1 scaffold:
|
|
| 406 |
|
| 407 |
Return as META JSON with files mapping."""
|
| 408 |
try:
|
| 409 |
-
response = call_model(client, system, prompt, is_python,
|
| 410 |
-
if "<<ERROR" in response:
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
| 415 |
except Exception as e:
|
| 416 |
write_error_log(e, "Scaffold failed")
|
| 417 |
return None
|
|
@@ -423,7 +542,7 @@ def import_project(zip_file) -> Dict[str, str]:
|
|
| 423 |
files = {}
|
| 424 |
with zipfile.ZipFile(zip_file.name, 'r') as zf:
|
| 425 |
for filename in zf.namelist():
|
| 426 |
-
if filename.endswith(('.py', '.txt', '.md', '.json', '.yaml')):
|
| 427 |
try:
|
| 428 |
content = zf.read(filename).decode('utf-8')
|
| 429 |
files[filename] = content
|
|
@@ -451,9 +570,9 @@ class CodeGenController:
|
|
| 451 |
self.infinite_mode = infinite_mode
|
| 452 |
self.is_python = is_python
|
| 453 |
self.model_name = PYTHON_MODEL if is_python else OTHER_MODEL
|
| 454 |
-
|
| 455 |
logging.info(f"Controller initialized for {'Python' if is_python else 'Other'} with model: {self.model_name}")
|
| 456 |
-
|
| 457 |
self.history: List[Dict] = []
|
| 458 |
self.current_files: Dict[str, str] = {}
|
| 459 |
self.current_code: str = ""
|
|
@@ -464,16 +583,16 @@ class CodeGenController:
|
|
| 464 |
self.best_zip: Optional[str] = None
|
| 465 |
self.best_review: str = ""
|
| 466 |
self.best_readme: str = ""
|
| 467 |
-
|
| 468 |
self.stop_flag = Path(tempfile.gettempdir()) / f"stop_{uuid.uuid4().hex[:8]}"
|
| 469 |
-
|
| 470 |
def cleanup_workdir(self, workdir: Path):
|
| 471 |
try:
|
| 472 |
if workdir.exists():
|
| 473 |
shutil.rmtree(workdir)
|
| 474 |
except Exception as e:
|
| 475 |
write_error_log(e, f"Failed to cleanup workdir {workdir}")
|
| 476 |
-
|
| 477 |
def start_scaffolding(self) -> bool:
|
| 478 |
scaffold = create_initial_scaffold(self.client, self.token, self.goal, self.is_python)
|
| 479 |
if scaffold and scaffold.get("files"):
|
|
@@ -492,12 +611,12 @@ class CodeGenController:
|
|
| 492 |
self.current_code = "\n\n".join(f"# {n}\n{c}" for n, c in self.current_files.items())
|
| 493 |
self.best_files = dict(self.current_files)
|
| 494 |
return False
|
| 495 |
-
|
| 496 |
def perform_iteration(self, iteration: int) -> Dict[str, Any]:
|
| 497 |
parent = Path(tempfile.mkdtemp(prefix="infgen_"))
|
| 498 |
workdir = parent / f"iter_{iteration}_{uuid.uuid4().hex[:6]}"
|
| 499 |
workdir.mkdir(parents=True, exist_ok=True)
|
| 500 |
-
|
| 501 |
try:
|
| 502 |
system = """You are a Level 5 Principal Software Engineer specializing in production-ready code.
|
| 503 |
Follow Defensive Programming, TDD, and best practices.
|
|
@@ -528,15 +647,29 @@ CRITICAL RULES:
|
|
| 528 |
|
| 529 |
Return the perfected code in META format."""
|
| 530 |
|
|
|
|
| 531 |
response = call_model(self.client, system, prompt, self.is_python, **self.settings)
|
| 532 |
|
| 533 |
-
if "<<ERROR" in response:
|
| 534 |
-
logging.error(f"Model returned error: {response[:200]}")
|
| 535 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
|
| 537 |
meta = parse_meta(response)
|
| 538 |
if not meta or not meta.get("files"):
|
| 539 |
-
logging.error(f"Parse failed. Response preview: {response[:
|
| 540 |
# Save failed response for debugging
|
| 541 |
with open(f"/tmp/failed_response_{iteration}.txt", "w") as f:
|
| 542 |
f.write(response)
|
|
@@ -545,12 +678,12 @@ Return the perfected code in META format."""
|
|
| 545 |
files = meta["files"]
|
| 546 |
write_files(workdir, files)
|
| 547 |
eval_results = run_evaluators(workdir)
|
| 548 |
-
|
| 549 |
review = generate_code_review(self.client, self.token, files, eval_results, self.is_python)
|
| 550 |
readme = generate_readme(self.client, self.token, self.goal, files, self.is_python)
|
| 551 |
files["README.md"] = readme
|
| 552 |
write_files(workdir, {"README.md": readme})
|
| 553 |
-
|
| 554 |
zip_path = make_zip(workdir)
|
| 555 |
|
| 556 |
return {
|
|
@@ -561,17 +694,17 @@ Return the perfected code in META format."""
|
|
| 561 |
except Exception as e:
|
| 562 |
write_error_log(e, "Iteration exception")
|
| 563 |
return {"success": False, "warning": f"Exception: {str(e)}"}
|
| 564 |
-
|
| 565 |
def run_loop(self) -> Generator:
|
| 566 |
iteration = 1
|
| 567 |
max_iterations = 999999 if self.infinite_mode else self.max_iters
|
| 568 |
-
|
| 569 |
if not self.current_files:
|
| 570 |
self.start_scaffolding()
|
| 571 |
-
|
| 572 |
initial_state = {"stop_flag_path": str(self.stop_flag)}
|
| 573 |
yield self.format_output(f"Starting with {self.model_name}...", iteration, max_iterations, initial_state)
|
| 574 |
-
|
| 575 |
while iteration <= max_iterations:
|
| 576 |
if self.stop_flag.exists():
|
| 577 |
try:
|
|
@@ -581,11 +714,11 @@ Return the perfected code in META format."""
|
|
| 581 |
pass
|
| 582 |
yield self.format_output("⛔ Stopped by user", iteration, max_iterations)
|
| 583 |
break
|
| 584 |
-
|
| 585 |
yield self.format_output(f"🔄 Iteration {iteration}/{max_iterations} running...", iteration, max_iterations)
|
| 586 |
-
|
| 587 |
result = self.perform_iteration(iteration)
|
| 588 |
-
|
| 589 |
if not result.get("success"):
|
| 590 |
warning_msg = result.get("warning", "Unknown iteration error")
|
| 591 |
logging.warning(f"Iteration {iteration} failed: {warning_msg}")
|
|
@@ -597,11 +730,11 @@ Return the perfected code in META format."""
|
|
| 597 |
|
| 598 |
eval_res = result.get("eval", {})
|
| 599 |
score = eval_res.get("quality_score", 0)
|
| 600 |
-
|
| 601 |
self.history.append({"iteration": iteration, "eval": eval_res})
|
| 602 |
self.current_files = result["files"]
|
| 603 |
self.current_code = "\n\n".join(f"# {n}\n{c}" for n, c in self.current_files.items())
|
| 604 |
-
|
| 605 |
if score > self.best_score:
|
| 606 |
if self.best_workspace:
|
| 607 |
self.cleanup_workdir(Path(self.best_workspace))
|
|
@@ -616,29 +749,29 @@ Return the perfected code in META format."""
|
|
| 616 |
else:
|
| 617 |
# Even if score didn't improve, still update current_files for next iteration
|
| 618 |
logging.info(f"Score {score}/100 - keeping best: {self.best_score}/100")
|
| 619 |
-
|
| 620 |
if result.get("workdir") and result.get("workdir") != self.best_workspace:
|
| 621 |
self.cleanup_workdir(Path(result["workdir"]))
|
| 622 |
-
|
| 623 |
yield self.format_output(f"Iteration {iteration} complete: {score}/100", iteration, max_iterations)
|
| 624 |
-
|
| 625 |
iteration += 1
|
| 626 |
time.sleep(0.3)
|
| 627 |
-
|
| 628 |
yield self.format_output(f"Complete! Best: {self.best_score}/100", iteration - 1, max_iterations)
|
| 629 |
-
|
| 630 |
def format_output(self, log_msg: str, iteration: int, max_iters: int, state: Optional[Dict] = None):
|
| 631 |
progress = f"Iteration {iteration}/{max_iters if max_iters < 999999 else 'INF'}" if iteration <= max_iters else "Complete"
|
| 632 |
-
|
| 633 |
main = self.best_files.get("main.py", "# Generating code...")
|
| 634 |
test = next((v for k, v in self.best_files.items() if 'test' in k and k.endswith('.py')), "# No tests yet...")
|
| 635 |
req = self.best_files.get("requirements.txt", "# No requirements yet...")
|
| 636 |
readme = self.best_files.get("README.md", "# Generating README...")
|
| 637 |
other = {k: v for k, v in self.best_files.items() if k not in [
|
| 638 |
-
"main.py", next((k for k in self.best_files if 'test' in k and k.endswith('.py')), None),
|
| 639 |
"requirements.txt", "README.md"
|
| 640 |
]}
|
| 641 |
-
|
| 642 |
return (
|
| 643 |
f"[{time.strftime('%X')}] {sanitize_log_message(log_msg)}", self.model_name, progress,
|
| 644 |
generate_metrics_html(self.history), self.best_eval, main, test, req, readme, other,
|
|
@@ -649,26 +782,26 @@ Return the perfected code in META format."""
|
|
| 649 |
def generate_metrics_html(history: List[Dict]) -> str:
|
| 650 |
if not history:
|
| 651 |
return "<div style='padding:12px'>No metrics yet</div>"
|
| 652 |
-
|
| 653 |
html_parts = ["<div style='font-family:sans-serif'><h4>Quality Trend</h4><div style='background:#f8f9fa;padding:12px;border-radius:8px'>"]
|
| 654 |
for h in history[-10:]:
|
| 655 |
score = h.get("eval", {}).get("quality_score", 0)
|
| 656 |
width = int(score * 2.5)
|
| 657 |
color = "#10b981" if score >= 80 else "#f59e0b" if score >= 60 else "#ef4444"
|
| 658 |
html_parts.append(f"<div style='margin:4px 0'>#{h.get('iteration')}: <div style='display:inline-block;width:{width}px;height:20px;background:{color};border-radius:4px'></div> {score}/100</div>")
|
| 659 |
-
|
| 660 |
scores = [h.get("eval", {}).get("quality_score", 0) for h in history]
|
| 661 |
avg = sum(scores) / len(scores) if scores else 0
|
| 662 |
best = max(scores) if scores else 0
|
| 663 |
html_parts.append(f"<div style='margin-top:12px'><strong>Avg:</strong> {avg:.1f} | <strong>Best:</strong> {best:.1f}</div></div></div>")
|
| 664 |
-
|
| 665 |
return "".join(html_parts)
|
| 666 |
|
| 667 |
# ---------- UI ----------
|
| 668 |
def create_ui():
|
| 669 |
with gr.Blocks(title="InfinateCodeGenerator Ultimate", theme=gr.themes.Soft()) as demo:
|
| 670 |
gr.Markdown("# InfinateCodeGenerator - Ultimate Merged Edition\n*Controller architecture • Smart models • Multi-file UI • Never stops early*")
|
| 671 |
-
|
| 672 |
controller_state = gr.State({})
|
| 673 |
|
| 674 |
with gr.Row():
|
|
@@ -736,20 +869,20 @@ def create_ui():
|
|
| 736 |
logging.error(error_msg)
|
| 737 |
yield (error_msg, "", "", "", {}, "", "", "", "", {}, "", None, "", {})
|
| 738 |
return
|
| 739 |
-
|
| 740 |
logging.info(f"Starting generation with token: {token[:10]}... (length: {len(token)})")
|
| 741 |
-
|
| 742 |
settings = {"temperature": temp, "top_p": top, "max_new_tokens": max_tok}
|
| 743 |
is_python_project = detect_language(goal, init_code)
|
| 744 |
logging.info(f"Detected project type: {'Python' if is_python_project else 'Other'}")
|
| 745 |
-
|
| 746 |
controller = CodeGenController(token, goal, instructions, settings, int(max_it), inf_mode, is_python_project)
|
| 747 |
-
|
| 748 |
if init_code and init_code.strip():
|
| 749 |
controller.current_files = {"main.py": init_code}
|
| 750 |
controller.current_code = init_code
|
| 751 |
logging.info("Using provided initial code")
|
| 752 |
-
|
| 753 |
yield from controller.run_loop()
|
| 754 |
|
| 755 |
def set_stop(controller_state_val):
|
|
@@ -769,7 +902,7 @@ def create_ui():
|
|
| 769 |
main_file, test_file, req_file, readme_file, other_files,
|
| 770 |
review_display, download_zip, workspace_path, controller_state
|
| 771 |
]
|
| 772 |
-
|
| 773 |
start_btn.click(
|
| 774 |
fn=start_gen,
|
| 775 |
inputs=[project_goal, initial_code, improve_instructions, hf_token_manual, infinite_mode, max_iters, temperature, top_p, max_tokens],
|
|
@@ -786,4 +919,4 @@ if __name__ == "__main__":
|
|
| 786 |
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|
| 787 |
except Exception as e:
|
| 788 |
print(f"Failed to launch Gradio app: {e}", file=sys.stderr)
|
| 789 |
-
sys.exit(1)
|
|
|
|
| 1 |
+
# InfinateCodeGenerator - Ultimate Merged Edition (v1.0.1) - patched
|
| 2 |
"""
|
| 3 |
+
Consolidated, hardened, and production-ready version (patched call_model & retries).
|
|
|
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
import sys
|
|
|
|
| 149 |
"""Extracts content from various possible streaming chunk formats."""
|
| 150 |
try:
|
| 151 |
if isinstance(chunk, dict) and (choices := chunk.get("choices")):
|
| 152 |
+
# typical OpenAI-like streaming chunk shape
|
| 153 |
delta = choices[0].get("delta", {})
|
|
|
|
| 154 |
return delta.get("content") or delta.get("text")
|
| 155 |
+
# HF newer shapes may use 'generations' inside chunk
|
| 156 |
+
if isinstance(chunk, dict) and "generations" in chunk:
|
| 157 |
+
gens = chunk.get("generations") or []
|
| 158 |
+
parts = []
|
| 159 |
+
for g in gens:
|
| 160 |
+
if isinstance(g, dict) and "text" in g:
|
| 161 |
+
parts.append(g["text"])
|
| 162 |
+
return "".join(parts) if parts else None
|
| 163 |
+
# some streaming yields objects with .delta or .content attributes
|
| 164 |
+
if hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
|
| 165 |
return chunk.delta.content
|
|
|
|
| 166 |
if isinstance(chunk, str):
|
| 167 |
return chunk
|
| 168 |
except Exception:
|
|
|
|
| 170 |
return None
|
| 171 |
|
| 172 |
def call_model(client: InferenceClient, system: str, user: str, is_python: bool, **settings) -> str:
|
| 173 |
+
"""Calls the appropriate LLM with retry logic and multiple fallbacks.
|
| 174 |
Tries non-streaming first (more reliable), falls back to streaming.
|
| 175 |
"""
|
| 176 |
+
if client is None:
|
| 177 |
+
return "<<ERROR: No inference client provided>>"
|
| 178 |
+
|
| 179 |
primary_model = PYTHON_MODEL if is_python else OTHER_MODEL
|
| 180 |
+
models_to_try = [primary_model] + [m for m in FALLBACK_MODELS if m != primary_model]
|
| 181 |
+
|
| 182 |
logging.info(f"Calling model for {'Python' if is_python else 'Other'} project. Primary: {primary_model}")
|
| 183 |
+
logging.debug(f"Raw settings: {settings}")
|
| 184 |
+
|
| 185 |
messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
| 186 |
+
|
| 187 |
+
# Build robust settings: include both keys some API variants accept
|
| 188 |
+
cleaned = {}
|
| 189 |
+
cleaned["temperature"] = settings.get("temperature", DEFAULT_TEMPERATURE)
|
| 190 |
+
cleaned["top_p"] = settings.get("top_p", DEFAULT_TOP_P)
|
| 191 |
+
max_new = settings.get("max_new_tokens", settings.get("max_tokens", DEFAULT_MAX_TOKENS))
|
| 192 |
+
try:
|
| 193 |
+
max_new = int(max_new)
|
| 194 |
+
except Exception:
|
| 195 |
+
max_new = DEFAULT_MAX_TOKENS
|
| 196 |
+
cleaned["max_new_tokens"] = max_new
|
| 197 |
+
# also include max_tokens for API variants
|
| 198 |
+
cleaned["max_tokens"] = max_new
|
| 199 |
+
|
| 200 |
+
logging.info(f"Using cleaned settings: temperature={cleaned['temperature']}, top_p={cleaned['top_p']}, max_new_tokens={cleaned['max_new_tokens']}")
|
| 201 |
+
|
| 202 |
last_exception = None
|
| 203 |
+
|
| 204 |
for model_name in models_to_try:
|
| 205 |
+
attempt = 0
|
| 206 |
+
# try a couple of times per model with decreasing tokens if necessary
|
| 207 |
+
while attempt < 3:
|
| 208 |
+
attempt += 1
|
|
|
|
|
|
|
| 209 |
try:
|
| 210 |
+
logging.info(f"Attempting non-streaming call to {model_name} (attempt {attempt})")
|
| 211 |
+
# Try named-argument style first (most robust)
|
| 212 |
+
try:
|
| 213 |
+
resp = client.chat_completion(messages=messages, model=model_name, stream=False, **cleaned)
|
| 214 |
+
except TypeError as te:
|
| 215 |
+
# Some client versions expect different parameter names - try a second shape
|
| 216 |
+
logging.debug(f"TypeError calling chat_completion: {te}")
|
| 217 |
+
try:
|
| 218 |
+
resp = client.chat_completion(messages=messages, model=model_name, **cleaned)
|
| 219 |
+
except Exception as e:
|
| 220 |
+
raise
|
| 221 |
+
except Exception as e:
|
| 222 |
+
# bubble up to outer exception handling
|
| 223 |
+
raise
|
| 224 |
+
|
| 225 |
+
response_text = ""
|
| 226 |
+
# Parse many possible shapes
|
| 227 |
+
try:
|
| 228 |
+
if isinstance(resp, dict):
|
| 229 |
+
# common HF shapes
|
| 230 |
+
if "generated_text" in resp and isinstance(resp["generated_text"], str):
|
| 231 |
+
response_text = resp["generated_text"]
|
| 232 |
+
elif "text" in resp and isinstance(resp["text"], str):
|
| 233 |
+
response_text = resp["text"]
|
| 234 |
+
elif "choices" in resp and resp["choices"]:
|
| 235 |
+
choice = resp["choices"][0]
|
| 236 |
+
if isinstance(choice, dict):
|
| 237 |
+
if "message" in choice and isinstance(choice["message"], dict):
|
| 238 |
+
response_text = choice["message"].get("content") or choice["message"].get("text", "") or ""
|
| 239 |
+
else:
|
| 240 |
+
response_text = choice.get("text") or choice.get("message") or ""
|
| 241 |
else:
|
| 242 |
+
response_text = str(choice)
|
| 243 |
+
elif "generations" in resp and resp["generations"]:
|
| 244 |
+
gens = resp["generations"]
|
| 245 |
+
parts = []
|
| 246 |
+
for g in gens:
|
| 247 |
+
if isinstance(g, dict) and "text" in g:
|
| 248 |
+
parts.append(g.get("text", ""))
|
| 249 |
+
elif hasattr(g, "text"):
|
| 250 |
+
parts.append(getattr(g, "text"))
|
| 251 |
+
response_text = "".join(parts)
|
| 252 |
+
else:
|
| 253 |
+
# fallback: inspect nested keys
|
| 254 |
+
if "data" in resp and isinstance(resp["data"], list) and resp["data"]:
|
| 255 |
+
# e.g., {'data':[{'text': '...'}]}
|
| 256 |
+
first = resp["data"][0]
|
| 257 |
+
if isinstance(first, dict) and "text" in first:
|
| 258 |
+
response_text = first["text"]
|
| 259 |
+
elif isinstance(resp, (list, tuple)):
|
| 260 |
+
# maybe list of generation dicts
|
| 261 |
+
parts = []
|
| 262 |
+
for item in resp:
|
| 263 |
+
if isinstance(item, dict) and "text" in item:
|
| 264 |
+
parts.append(item["text"])
|
| 265 |
+
else:
|
| 266 |
+
parts.append(str(item))
|
| 267 |
+
response_text = "".join(parts)
|
| 268 |
+
elif isinstance(resp, str):
|
| 269 |
+
response_text = resp
|
| 270 |
+
else:
|
| 271 |
+
# last resort: str()
|
| 272 |
+
response_text = str(resp)
|
| 273 |
+
except Exception as e:
|
| 274 |
+
write_error_log(e, f"Non-stream parsing failed for model {model_name}")
|
| 275 |
+
response_text = ""
|
| 276 |
+
|
| 277 |
+
if response_text and response_text.strip():
|
| 278 |
+
logging.info(f"✓ Successfully got response from {model_name} ({len(response_text)} chars)")
|
| 279 |
+
return response_text
|
| 280 |
else:
|
| 281 |
+
logging.warning(f"Non-streaming returned empty response from {model_name}, attempt {attempt}.")
|
| 282 |
+
# fall through to streaming fallback below
|
| 283 |
except Exception as e:
|
| 284 |
+
last_exception = e
|
| 285 |
+
write_error_log(e, f"Non-stream model {model_name} failed on attempt {attempt}")
|
| 286 |
+
logging.error(f"Non-stream error for {model_name}: {str(e)[:200]}")
|
| 287 |
|
| 288 |
+
# Streaming fallback
|
| 289 |
+
try:
|
| 290 |
+
logging.info(f"Attempting streaming call to {model_name} (attempt {attempt})")
|
| 291 |
+
# streaming - some versions yield objects, some strings
|
| 292 |
+
try:
|
| 293 |
+
stream_iter = client.chat_completion(messages=messages, model=model_name, stream=True, **cleaned)
|
| 294 |
+
except TypeError:
|
| 295 |
+
# Try alternate call-signature
|
| 296 |
+
stream_iter = client.chat_completion(messages=messages, model=model_name, stream=True)
|
| 297 |
+
except Exception as e:
|
| 298 |
+
raise
|
| 299 |
+
|
| 300 |
+
collected = []
|
| 301 |
+
try:
|
| 302 |
+
for chunk in stream_iter:
|
| 303 |
+
piece = extract_chunk_content(chunk)
|
| 304 |
+
if piece:
|
| 305 |
+
collected.append(piece)
|
| 306 |
+
response = "".join(collected).strip()
|
| 307 |
+
except Exception as e:
|
| 308 |
+
# some streaming iterables need to be exhausted differently; safely cast to string
|
| 309 |
+
write_error_log(e, "Streaming parsing failed")
|
| 310 |
+
response = ""
|
| 311 |
+
if response:
|
| 312 |
+
logging.info(f"✓ Successfully got streaming response from {model_name} ({len(response)} chars)")
|
| 313 |
+
return response
|
| 314 |
+
else:
|
| 315 |
+
logging.warning(f"Streaming returned empty response from {model_name} (attempt {attempt})")
|
| 316 |
+
except Exception as e:
|
| 317 |
+
last_exception = e
|
| 318 |
+
write_error_log(e, f"Streaming model {model_name} failed on attempt {attempt}")
|
| 319 |
+
logging.error(f"Streaming error for {model_name}: {str(e)[:200]}")
|
| 320 |
+
# reduce tokens and retry
|
| 321 |
+
time.sleep(1 + attempt * 0.5)
|
| 322 |
+
# reduce token budget to try avoid model refusing or failing
|
| 323 |
+
cleaned["max_new_tokens"] = max(256, int(cleaned["max_new_tokens"] * 0.5))
|
| 324 |
+
cleaned["max_tokens"] = cleaned["max_new_tokens"]
|
| 325 |
+
logging.info(f"Reduced max_new_tokens to {cleaned['max_new_tokens']} and retrying")
|
| 326 |
+
continue
|
| 327 |
+
|
| 328 |
+
# if reached here (no response), reduce tokens and retry
|
| 329 |
+
cleaned["max_new_tokens"] = max(256, int(cleaned["max_new_tokens"] * 0.6))
|
| 330 |
+
cleaned["max_tokens"] = cleaned["max_new_tokens"]
|
| 331 |
+
logging.info(f"No response; reduced max_new_tokens to {cleaned['max_new_tokens']} and will retry (attempt {attempt})")
|
| 332 |
+
time.sleep(0.8 + attempt * 0.3)
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
logging.error(f"❌ ALL MODELS FAILED. Last error: {last_exception}")
|
| 335 |
return f"<<ERROR: All models failed. Last error: {sanitize_log_message(str(last_exception))}>>"
|
| 336 |
|
|
|
|
| 343 |
|
| 344 |
def parse_meta(text: str) -> Optional[Dict[str, Any]]:
|
| 345 |
"""Parses model output to extract code files, trying structured JSON first, then falling back to heuristics."""
|
| 346 |
+
if not text or not isinstance(text, str):
|
| 347 |
+
return None
|
| 348 |
+
|
| 349 |
# Strict JSON/META block parsing
|
| 350 |
+
for pattern in [r"```json\s*(.*?)```", r"```meta\s*(.*?)```", r"```META\s*(.*?)```", r"<META>(.*?)</META>"]:
|
| 351 |
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
| 352 |
if match:
|
| 353 |
try:
|
|
|
|
| 359 |
except (json.JSONDecodeError, TypeError) as e:
|
| 360 |
logging.warning(f"JSON parse failed: {e}")
|
| 361 |
continue
|
| 362 |
+
|
| 363 |
+
# Also try to detect a top-level JSON blob
|
| 364 |
+
try:
|
| 365 |
+
parsed_full = json.loads(text.strip())
|
| 366 |
+
if isinstance(parsed_full, dict) and "files" in parsed_full and validate_files_dict(parsed_full["files"]):
|
| 367 |
+
logging.info("Parsed raw JSON response as META")
|
| 368 |
+
return parsed_full
|
| 369 |
+
except Exception:
|
| 370 |
+
pass
|
| 371 |
+
|
| 372 |
# Fallback to heuristic parsing of code blocks
|
| 373 |
files = {}
|
| 374 |
+
|
| 375 |
# Try to find filename markers before code blocks
|
|
|
|
| 376 |
filename_patterns = [
|
| 377 |
r'#\s*[Ff]ile:\s*([\w/._-]+\.[\w]+)',
|
| 378 |
r'##\s*([\w/._-]+\.[\w]+)',
|
| 379 |
+
r'\*\*\s*([\w/._-]+\.[\w]+)\s*\*\*',
|
| 380 |
+
r'^\s*([\w\-/_.]+?\.(?:py|txt|md|json|yaml|yml))\s*:\s*$', # e.g., "main.py:" on its own line
|
| 381 |
]
|
| 382 |
+
|
| 383 |
all_filenames = []
|
| 384 |
for pattern in filename_patterns:
|
| 385 |
+
all_filenames.extend(re.findall(pattern, text, flags=re.MULTILINE))
|
| 386 |
+
|
| 387 |
+
# Grab all fenced code blocks
|
| 388 |
+
code_blocks = re.findall(r"```(?:[\w+-]+)?\s*([\s\S]*?)```", text, re.DOTALL)
|
| 389 |
+
|
| 390 |
+
# Also capture indented/code-block-like sections (fallback)
|
| 391 |
+
if not code_blocks:
|
| 392 |
+
# naive: split by two or more newlines and keep blocks that look like code
|
| 393 |
+
chunks = [c for c in re.split(r"\n{2,}", text) if len(c.splitlines()) > 1]
|
| 394 |
+
code_blocks = chunks[:6] # limit
|
| 395 |
+
|
| 396 |
if not code_blocks:
|
| 397 |
logging.warning("No code blocks found in model response")
|
| 398 |
return None
|
|
|
|
| 402 |
block_content = block.strip()
|
| 403 |
if not block_content:
|
| 404 |
continue
|
| 405 |
+
|
| 406 |
if i < len(all_filenames):
|
| 407 |
filename = all_filenames[i]
|
| 408 |
else:
|
| 409 |
# Guess filename based on content
|
| 410 |
if "def test_" in block_content or "import pytest" in block_content:
|
| 411 |
+
filename = f"tests/test_main.py" if not block_content.startswith("test_") else f"{block_content.splitlines()[0][:50]}.py"
|
| 412 |
elif "requirements" in text.lower() and i == 0:
|
| 413 |
filename = "requirements.txt"
|
| 414 |
+
elif "# README" in block_content or block_content.startswith("# ") or block_content.lower().strip().startswith("readme"):
|
| 415 |
filename = "README.md"
|
| 416 |
else:
|
| 417 |
filename = f"main.py" if i == 0 else f"file_{i}.py"
|
| 418 |
+
|
| 419 |
+
# ensure relative path safe
|
| 420 |
files[filename] = block_content
|
| 421 |
|
| 422 |
if validate_files_dict(files) and files:
|
| 423 |
logging.info(f"Heuristic parsing extracted {len(files)} files: {list(files.keys())}")
|
| 424 |
return {"files": files, "changelog": "Extracted files via heuristic parsing."}
|
| 425 |
+
|
| 426 |
+
# As a last resort, if the whole output looks like a single file, place it into main.py
|
| 427 |
+
if text.strip():
|
| 428 |
+
files = {"main.py": text.strip()}
|
| 429 |
+
if validate_files_dict(files):
|
| 430 |
+
logging.info("Parsed whole response into main.py as last resort")
|
| 431 |
+
return {"files": files, "changelog": "Fallback single-file parse."}
|
| 432 |
+
|
| 433 |
logging.error("Failed to extract any valid files from model response")
|
| 434 |
return None
|
| 435 |
|
| 436 |
# ---------- Enhanced evaluators ----------
|
| 437 |
def run_evaluators(workdir: Path) -> Dict[str, Any]:
|
| 438 |
out = {}
|
| 439 |
+
|
| 440 |
rc, txt = run_cmd([sys.executable, "-m", "flake8", ".", "--count", "--max-line-length=100"], cwd=str(workdir))
|
| 441 |
out["flake8_pass"] = rc == 0
|
| 442 |
out["flake8_out"] = txt
|
| 443 |
+
|
| 444 |
rc, txt = run_cmd([sys.executable, "-m", "bandit", "-r", ".", "-f", "txt"], cwd=str(workdir))
|
| 445 |
out["bandit_pass"] = rc == 0 or "No issues" in txt
|
| 446 |
out["bandit_out"] = txt
|
| 447 |
+
|
| 448 |
test_files = list(workdir.glob("**/test_*.py")) + list(workdir.glob("**/*_test.py"))
|
| 449 |
if test_files:
|
| 450 |
rc, txt = run_cmd([sys.executable, "-m", "pytest", "--maxfail=1", "--tb=short"], cwd=str(workdir))
|
|
|
|
| 452 |
else:
|
| 453 |
out["pytest_pass"] = False
|
| 454 |
out["pytest_out"] = txt if test_files else "No tests"
|
| 455 |
+
|
| 456 |
rc, txt = run_cmd([sys.executable, "-m", "black", "--check", "."], cwd=str(workdir))
|
| 457 |
out["black_pass"] = rc == 0
|
| 458 |
+
|
| 459 |
complexity = 5.0
|
| 460 |
rc, txt = run_cmd([sys.executable, "-m", "radon", "cc", ".", "-s", "-a"], cwd=str(workdir))
|
| 461 |
if rc == 0:
|
|
|
|
| 466 |
except:
|
| 467 |
pass
|
| 468 |
out["complexity"] = complexity
|
| 469 |
+
|
| 470 |
# Calculate weighted score
|
| 471 |
style = 100.0 if (out["flake8_pass"] and out["black_pass"]) else 50.0
|
| 472 |
security = 100.0 if out["bandit_pass"] else 30.0
|
| 473 |
tests = 100.0 if out["pytest_pass"] else 20.0
|
| 474 |
maintainability = max(0.0, 100.0 - (complexity - 5.0) * 10.0) if complexity > 5 else 100.0
|
| 475 |
+
|
| 476 |
w = EVAL_WEIGHTS
|
| 477 |
score = w["style"] * style + w["security"] * security + w["tests"] * tests + w["maintainability"] * maintainability
|
| 478 |
+
|
| 479 |
out["quality_score"] = round(max(0.0, min(100.0, score)), 1)
|
| 480 |
out["breakdown"] = {
|
| 481 |
"style": round(style, 1),
|
|
|
|
| 483 |
"tests": round(tests, 1),
|
| 484 |
"maintainability": round(maintainability, 1)
|
| 485 |
}
|
| 486 |
+
|
| 487 |
return out
|
| 488 |
|
| 489 |
# ---------- AI features ----------
|
|
|
|
| 496 |
Quality: Flake8={'Pass' if eval_results.get('flake8_pass') else 'Fail'}, Tests={'Pass' if eval_results.get('pytest_pass') else 'Fail'}
|
| 497 |
|
| 498 |
Give 2-3 specific, actionable improvements:"""
|
| 499 |
+
review = call_model(client, "You are a senior code reviewer.", prompt, is_python, max_new_tokens=400, temperature=0.2, top_p=0.8)
|
| 500 |
return review if review and "<<ERROR" not in review else "No review"
|
| 501 |
|
| 502 |
def generate_readme(client: Optional[InferenceClient], token: str, goal: str, files: Dict[str, str], is_python: bool) -> str:
|
|
|
|
| 508 |
{summary}
|
| 509 |
|
| 510 |
Include: description, installation, usage."""
|
| 511 |
+
readme = call_model(client, "You are a technical writer.", prompt, is_python, max_new_tokens=600, temperature=0.2, top_p=0.9)
|
| 512 |
return readme if readme and "<<ERROR" not in readme else "# Project\n\nGenerated code."
|
| 513 |
|
| 514 |
def create_initial_scaffold(client: Optional[InferenceClient], token: str, goal: str, is_python: bool) -> Optional[Dict[str, Any]]:
|
|
|
|
| 522 |
|
| 523 |
Return as META JSON with files mapping."""
|
| 524 |
try:
|
| 525 |
+
response = call_model(client, system, prompt, is_python, max_new_tokens=3072, temperature=0.4)
|
| 526 |
+
if response and "<<ERROR" not in response:
|
| 527 |
+
meta = parse_meta(response)
|
| 528 |
+
if meta and meta.get("files") and validate_files_dict(meta["files"]):
|
| 529 |
+
return meta
|
| 530 |
+
else:
|
| 531 |
+
# Save raw scaffold response for debugging
|
| 532 |
+
with open("/tmp/failed_scaffold_response.txt", "w") as f:
|
| 533 |
+
f.write(response)
|
| 534 |
except Exception as e:
|
| 535 |
write_error_log(e, "Scaffold failed")
|
| 536 |
return None
|
|
|
|
| 542 |
files = {}
|
| 543 |
with zipfile.ZipFile(zip_file.name, 'r') as zf:
|
| 544 |
for filename in zf.namelist():
|
| 545 |
+
if filename.endswith(('.py', '.txt', '.md', '.json', '.yaml', '.yml')):
|
| 546 |
try:
|
| 547 |
content = zf.read(filename).decode('utf-8')
|
| 548 |
files[filename] = content
|
|
|
|
| 570 |
self.infinite_mode = infinite_mode
|
| 571 |
self.is_python = is_python
|
| 572 |
self.model_name = PYTHON_MODEL if is_python else OTHER_MODEL
|
| 573 |
+
|
| 574 |
logging.info(f"Controller initialized for {'Python' if is_python else 'Other'} with model: {self.model_name}")
|
| 575 |
+
|
| 576 |
self.history: List[Dict] = []
|
| 577 |
self.current_files: Dict[str, str] = {}
|
| 578 |
self.current_code: str = ""
|
|
|
|
| 583 |
self.best_zip: Optional[str] = None
|
| 584 |
self.best_review: str = ""
|
| 585 |
self.best_readme: str = ""
|
| 586 |
+
|
| 587 |
self.stop_flag = Path(tempfile.gettempdir()) / f"stop_{uuid.uuid4().hex[:8]}"
|
| 588 |
+
|
| 589 |
def cleanup_workdir(self, workdir: Path):
|
| 590 |
try:
|
| 591 |
if workdir.exists():
|
| 592 |
shutil.rmtree(workdir)
|
| 593 |
except Exception as e:
|
| 594 |
write_error_log(e, f"Failed to cleanup workdir {workdir}")
|
| 595 |
+
|
| 596 |
def start_scaffolding(self) -> bool:
|
| 597 |
scaffold = create_initial_scaffold(self.client, self.token, self.goal, self.is_python)
|
| 598 |
if scaffold and scaffold.get("files"):
|
|
|
|
| 611 |
self.current_code = "\n\n".join(f"# {n}\n{c}" for n, c in self.current_files.items())
|
| 612 |
self.best_files = dict(self.current_files)
|
| 613 |
return False
|
| 614 |
+
|
| 615 |
def perform_iteration(self, iteration: int) -> Dict[str, Any]:
|
| 616 |
parent = Path(tempfile.mkdtemp(prefix="infgen_"))
|
| 617 |
workdir = parent / f"iter_{iteration}_{uuid.uuid4().hex[:6]}"
|
| 618 |
workdir.mkdir(parents=True, exist_ok=True)
|
| 619 |
+
|
| 620 |
try:
|
| 621 |
system = """You are a Level 5 Principal Software Engineer specializing in production-ready code.
|
| 622 |
Follow Defensive Programming, TDD, and best practices.
|
|
|
|
| 647 |
|
| 648 |
Return the perfected code in META format."""
|
| 649 |
|
| 650 |
+
# Attempt the model call, with extra retry attempts and reduced token fallback
|
| 651 |
response = call_model(self.client, system, prompt, self.is_python, **self.settings)
|
| 652 |
|
| 653 |
+
if not response or "<<ERROR" in response:
|
| 654 |
+
logging.error(f"Model returned error or empty: {response[:200]}")
|
| 655 |
+
# Save response for debugging if available
|
| 656 |
+
with open(f"/tmp/failed_response_{iteration}.txt", "w") as f:
|
| 657 |
+
f.write(response or "<<EMPTY RESPONSE>>")
|
| 658 |
+
# Try one conservative retry with reduced token budget before failing
|
| 659 |
+
logging.info("Attempting a conservative retry with reduced tokens...")
|
| 660 |
+
conservative_settings = dict(self.settings)
|
| 661 |
+
conservative_settings["max_new_tokens"] = min(1024, int(conservative_settings.get("max_new_tokens", 1024)))
|
| 662 |
+
conservative_settings["temperature"] = min(0.3, float(conservative_settings.get("temperature", 0.3)))
|
| 663 |
+
response_retry = call_model(self.client, system, prompt, self.is_python, **conservative_settings)
|
| 664 |
+
if response_retry and "<<ERROR" not in response_retry:
|
| 665 |
+
response = response_retry
|
| 666 |
+
else:
|
| 667 |
+
# write debug file already done
|
| 668 |
+
return {"success": False, "warning": "Model error - keeping previous code"}
|
| 669 |
|
| 670 |
meta = parse_meta(response)
|
| 671 |
if not meta or not meta.get("files"):
|
| 672 |
+
logging.error(f"Parse failed. Response preview: {response[:1000]}")
|
| 673 |
# Save failed response for debugging
|
| 674 |
with open(f"/tmp/failed_response_{iteration}.txt", "w") as f:
|
| 675 |
f.write(response)
|
|
|
|
| 678 |
files = meta["files"]
|
| 679 |
write_files(workdir, files)
|
| 680 |
eval_results = run_evaluators(workdir)
|
| 681 |
+
|
| 682 |
review = generate_code_review(self.client, self.token, files, eval_results, self.is_python)
|
| 683 |
readme = generate_readme(self.client, self.token, self.goal, files, self.is_python)
|
| 684 |
files["README.md"] = readme
|
| 685 |
write_files(workdir, {"README.md": readme})
|
| 686 |
+
|
| 687 |
zip_path = make_zip(workdir)
|
| 688 |
|
| 689 |
return {
|
|
|
|
| 694 |
except Exception as e:
|
| 695 |
write_error_log(e, "Iteration exception")
|
| 696 |
return {"success": False, "warning": f"Exception: {str(e)}"}
|
| 697 |
+
|
| 698 |
def run_loop(self) -> Generator:
|
| 699 |
iteration = 1
|
| 700 |
max_iterations = 999999 if self.infinite_mode else self.max_iters
|
| 701 |
+
|
| 702 |
if not self.current_files:
|
| 703 |
self.start_scaffolding()
|
| 704 |
+
|
| 705 |
initial_state = {"stop_flag_path": str(self.stop_flag)}
|
| 706 |
yield self.format_output(f"Starting with {self.model_name}...", iteration, max_iterations, initial_state)
|
| 707 |
+
|
| 708 |
while iteration <= max_iterations:
|
| 709 |
if self.stop_flag.exists():
|
| 710 |
try:
|
|
|
|
| 714 |
pass
|
| 715 |
yield self.format_output("⛔ Stopped by user", iteration, max_iterations)
|
| 716 |
break
|
| 717 |
+
|
| 718 |
yield self.format_output(f"🔄 Iteration {iteration}/{max_iterations} running...", iteration, max_iterations)
|
| 719 |
+
|
| 720 |
result = self.perform_iteration(iteration)
|
| 721 |
+
|
| 722 |
if not result.get("success"):
|
| 723 |
warning_msg = result.get("warning", "Unknown iteration error")
|
| 724 |
logging.warning(f"Iteration {iteration} failed: {warning_msg}")
|
|
|
|
| 730 |
|
| 731 |
eval_res = result.get("eval", {})
|
| 732 |
score = eval_res.get("quality_score", 0)
|
| 733 |
+
|
| 734 |
self.history.append({"iteration": iteration, "eval": eval_res})
|
| 735 |
self.current_files = result["files"]
|
| 736 |
self.current_code = "\n\n".join(f"# {n}\n{c}" for n, c in self.current_files.items())
|
| 737 |
+
|
| 738 |
if score > self.best_score:
|
| 739 |
if self.best_workspace:
|
| 740 |
self.cleanup_workdir(Path(self.best_workspace))
|
|
|
|
| 749 |
else:
|
| 750 |
# Even if score didn't improve, still update current_files for next iteration
|
| 751 |
logging.info(f"Score {score}/100 - keeping best: {self.best_score}/100")
|
| 752 |
+
|
| 753 |
if result.get("workdir") and result.get("workdir") != self.best_workspace:
|
| 754 |
self.cleanup_workdir(Path(result["workdir"]))
|
| 755 |
+
|
| 756 |
yield self.format_output(f"Iteration {iteration} complete: {score}/100", iteration, max_iterations)
|
| 757 |
+
|
| 758 |
iteration += 1
|
| 759 |
time.sleep(0.3)
|
| 760 |
+
|
| 761 |
yield self.format_output(f"Complete! Best: {self.best_score}/100", iteration - 1, max_iterations)
|
| 762 |
+
|
| 763 |
def format_output(self, log_msg: str, iteration: int, max_iters: int, state: Optional[Dict] = None):
|
| 764 |
progress = f"Iteration {iteration}/{max_iters if max_iters < 999999 else 'INF'}" if iteration <= max_iters else "Complete"
|
| 765 |
+
|
| 766 |
main = self.best_files.get("main.py", "# Generating code...")
|
| 767 |
test = next((v for k, v in self.best_files.items() if 'test' in k and k.endswith('.py')), "# No tests yet...")
|
| 768 |
req = self.best_files.get("requirements.txt", "# No requirements yet...")
|
| 769 |
readme = self.best_files.get("README.md", "# Generating README...")
|
| 770 |
other = {k: v for k, v in self.best_files.items() if k not in [
|
| 771 |
+
"main.py", next((k for k in self.best_files if 'test' in k and k.endswith('.py')), None),
|
| 772 |
"requirements.txt", "README.md"
|
| 773 |
]}
|
| 774 |
+
|
| 775 |
return (
|
| 776 |
f"[{time.strftime('%X')}] {sanitize_log_message(log_msg)}", self.model_name, progress,
|
| 777 |
generate_metrics_html(self.history), self.best_eval, main, test, req, readme, other,
|
|
|
|
| 782 |
def generate_metrics_html(history: List[Dict]) -> str:
|
| 783 |
if not history:
|
| 784 |
return "<div style='padding:12px'>No metrics yet</div>"
|
| 785 |
+
|
| 786 |
html_parts = ["<div style='font-family:sans-serif'><h4>Quality Trend</h4><div style='background:#f8f9fa;padding:12px;border-radius:8px'>"]
|
| 787 |
for h in history[-10:]:
|
| 788 |
score = h.get("eval", {}).get("quality_score", 0)
|
| 789 |
width = int(score * 2.5)
|
| 790 |
color = "#10b981" if score >= 80 else "#f59e0b" if score >= 60 else "#ef4444"
|
| 791 |
html_parts.append(f"<div style='margin:4px 0'>#{h.get('iteration')}: <div style='display:inline-block;width:{width}px;height:20px;background:{color};border-radius:4px'></div> {score}/100</div>")
|
| 792 |
+
|
| 793 |
scores = [h.get("eval", {}).get("quality_score", 0) for h in history]
|
| 794 |
avg = sum(scores) / len(scores) if scores else 0
|
| 795 |
best = max(scores) if scores else 0
|
| 796 |
html_parts.append(f"<div style='margin-top:12px'><strong>Avg:</strong> {avg:.1f} | <strong>Best:</strong> {best:.1f}</div></div></div>")
|
| 797 |
+
|
| 798 |
return "".join(html_parts)
|
| 799 |
|
| 800 |
# ---------- UI ----------
|
| 801 |
def create_ui():
|
| 802 |
with gr.Blocks(title="InfinateCodeGenerator Ultimate", theme=gr.themes.Soft()) as demo:
|
| 803 |
gr.Markdown("# InfinateCodeGenerator - Ultimate Merged Edition\n*Controller architecture • Smart models • Multi-file UI • Never stops early*")
|
| 804 |
+
|
| 805 |
controller_state = gr.State({})
|
| 806 |
|
| 807 |
with gr.Row():
|
|
|
|
| 869 |
logging.error(error_msg)
|
| 870 |
yield (error_msg, "", "", "", {}, "", "", "", "", {}, "", None, "", {})
|
| 871 |
return
|
| 872 |
+
|
| 873 |
logging.info(f"Starting generation with token: {token[:10]}... (length: {len(token)})")
|
| 874 |
+
|
| 875 |
settings = {"temperature": temp, "top_p": top, "max_new_tokens": max_tok}
|
| 876 |
is_python_project = detect_language(goal, init_code)
|
| 877 |
logging.info(f"Detected project type: {'Python' if is_python_project else 'Other'}")
|
| 878 |
+
|
| 879 |
controller = CodeGenController(token, goal, instructions, settings, int(max_it), inf_mode, is_python_project)
|
| 880 |
+
|
| 881 |
if init_code and init_code.strip():
|
| 882 |
controller.current_files = {"main.py": init_code}
|
| 883 |
controller.current_code = init_code
|
| 884 |
logging.info("Using provided initial code")
|
| 885 |
+
|
| 886 |
yield from controller.run_loop()
|
| 887 |
|
| 888 |
def set_stop(controller_state_val):
|
|
|
|
| 902 |
main_file, test_file, req_file, readme_file, other_files,
|
| 903 |
review_display, download_zip, workspace_path, controller_state
|
| 904 |
]
|
| 905 |
+
|
| 906 |
start_btn.click(
|
| 907 |
fn=start_gen,
|
| 908 |
inputs=[project_goal, initial_code, improve_instructions, hf_token_manual, infinite_mode, max_iters, temperature, top_p, max_tokens],
|
|
|
|
| 919 |
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|
| 920 |
except Exception as e:
|
| 921 |
print(f"Failed to launch Gradio app: {e}", file=sys.stderr)
|
| 922 |
+
sys.exit(1)
|