| """Code extraction from model responses. |
| |
| Extracts fenced code blocks and multi-file @@FILE: blocks. |
| Normalizes language names and detects Gradio code. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import html |
| import re |
|
|
| from code.config.constants import ( |
| CODE_BLOCK_RE, |
| FILE_BLOCK_RE, |
| THINKING_BLOCK_RE, |
| ) |
|
|
|
|
| def strip_thinking_blocks(text: str) -> str: |
| """Remove <think/> blocks from model output.""" |
| return THINKING_BLOCK_RE.sub("", text).strip() |
|
|
|
|
| def extract_code(response: str) -> tuple[str, str | None]: |
| """Return the first fenced code block and its language tag.""" |
| visible_response = strip_thinking_blocks(response) |
| match = CODE_BLOCK_RE.search(visible_response) |
| if not match: |
| return "", None |
| return match.group(2).strip(), (match.group(1).strip().lower() or None) |
|
|
|
|
| def extract_multi_file(response: str) -> dict[str, str]: |
| """Extract multi-file project from @@FILE: format. |
| |
| Returns dict of {filepath: content}. |
| """ |
| files: dict[str, str] = {} |
| visible = strip_thinking_blocks(response) |
|
|
| for match in FILE_BLOCK_RE.finditer(visible): |
| filepath = match.group(1).strip() |
| content = match.group(2).strip() |
| files[filepath] = content |
|
|
| |
| if not files: |
| code, lang = extract_code(response) |
| if code: |
| ext_map = { |
| "python": "main.py", "py": "main.py", |
| "javascript": "index.js", "js": "index.js", |
| "typescript": "index.ts", "ts": "index.ts", |
| "html": "index.html", |
| "css": "styles.css", |
| "java": "Main.java", |
| "go": "main.go", |
| "rust": "main.rs", |
| "php": "index.php", |
| "ruby": "main.rb", |
| "csharp": "Program.cs", |
| "swift": "main.swift", |
| "kotlin": "Main.kt", |
| } |
| filename = ext_map.get(lang or "", "code.txt") |
| files[filename] = code |
|
|
| return files |
|
|
|
|
| def normalize_language(target_language: str | None, fence_lang: str | None) -> str: |
| """Normalize language name to a canonical form.""" |
| lang = (fence_lang or target_language or "python").lower() |
| if lang in {"python", "py"}: |
| return "python" |
| if lang in {"html", "web", "css"}: |
| return "web" |
| if lang in {"javascript", "js"}: |
| return "javascript" |
| if lang in {"typescript", "ts"}: |
| return "typescript" |
| if lang == "java": |
| return "java" |
| if lang == "go": |
| return "go" |
| if lang == "rust": |
| return "rust" |
| if lang == "php": |
| return "php" |
| if lang == "ruby": |
| return "ruby" |
| if lang in {"csharp", "c#"}: |
| return "csharp" |
| if lang == "swift": |
| return "swift" |
| if lang == "kotlin": |
| return "kotlin" |
| return lang |
|
|
|
|
| def is_gradio_code(code: str) -> bool: |
| """Detect if Python code is a Gradio app.""" |
| return bool( |
| re.search( |
| r"import\s+gradio|from\s+gradio\s+import|gr\.\s*(Interface|Blocks|TabbedInterface|ChatInterface|App)", |
| code, |
| ) |
| ) |
|
|
|
|
| |
|
|
| def _web_document(code: str, fence_lang: str | None) -> str: |
| """Wrap code in an HTML document if needed.""" |
| lang = (fence_lang or "").lower() |
| if lang in {"javascript", "js"}: |
| return f"<!doctype html><html><body><script>\n{code}\n</script></body></html>" |
| if lang == "css": |
| return f"<!doctype html><html><head><style>\n{code}\n</style></head><body></body></html>" |
| if re.search(r"<!doctype|<html[\s>]", code, flags=re.IGNORECASE): |
| return code |
| return f"<!doctype html><html><head><meta charset='utf-8'></head><body>\n{code}\n</body></html>" |
|
|
|
|
| def build_iframe(code: str, fence_lang: str | None = None) -> str: |
| """Build a sandboxed iframe HTML string for web preview.""" |
| document = _web_document(code, fence_lang) |
| srcdoc = html.escape(document, quote=True) |
| return ( |
| '<iframe class="web-frame" ' |
| 'sandbox="allow-scripts" ' |
| 'allow="fullscreen" ' |
| "allowfullscreen " |
| f'srcdoc="{srcdoc}" ' |
| 'style="width:100%; min-height:680px; border:0; border-radius:14px; ' |
| 'background:white;"></iframe>' |
| ) |
|
|