File size: 4,445 Bytes
ccb935d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""Code extraction from model responses.

Extracts fenced code blocks and multi-file @@FILE: blocks.
Normalizes language names and detects Gradio code.
"""

from __future__ import annotations

import html
import re

from code.config.constants import (
    CODE_BLOCK_RE,
    FILE_BLOCK_RE,
    THINKING_BLOCK_RE,
)


def strip_thinking_blocks(text: str) -> str:
    """Remove <think/> blocks from model output."""
    return THINKING_BLOCK_RE.sub("", text).strip()


def extract_code(response: str) -> tuple[str, str | None]:
    """Return the first fenced code block and its language tag."""
    visible_response = strip_thinking_blocks(response)
    match = CODE_BLOCK_RE.search(visible_response)
    if not match:
        return "", None
    return match.group(2).strip(), (match.group(1).strip().lower() or None)


def extract_multi_file(response: str) -> dict[str, str]:
    """Extract multi-file project from @@FILE: format.

    Returns dict of {filepath: content}.
    """
    files: dict[str, str] = {}
    visible = strip_thinking_blocks(response)

    for match in FILE_BLOCK_RE.finditer(visible):
        filepath = match.group(1).strip()
        content = match.group(2).strip()
        files[filepath] = content

    # Fallback: if no @@FILE: blocks found, extract single code block
    if not files:
        code, lang = extract_code(response)
        if code:
            ext_map = {
                "python": "main.py", "py": "main.py",
                "javascript": "index.js", "js": "index.js",
                "typescript": "index.ts", "ts": "index.ts",
                "html": "index.html",
                "css": "styles.css",
                "java": "Main.java",
                "go": "main.go",
                "rust": "main.rs",
                "php": "index.php",
                "ruby": "main.rb",
                "csharp": "Program.cs",
                "swift": "main.swift",
                "kotlin": "Main.kt",
            }
            filename = ext_map.get(lang or "", "code.txt")
            files[filename] = code

    return files


def normalize_language(target_language: str | None, fence_lang: str | None) -> str:
    """Normalize language name to a canonical form."""
    lang = (fence_lang or target_language or "python").lower()
    if lang in {"python", "py"}:
        return "python"
    if lang in {"html", "web", "css"}:
        return "web"
    if lang in {"javascript", "js"}:
        return "javascript"
    if lang in {"typescript", "ts"}:
        return "typescript"
    if lang == "java":
        return "java"
    if lang == "go":
        return "go"
    if lang == "rust":
        return "rust"
    if lang == "php":
        return "php"
    if lang == "ruby":
        return "ruby"
    if lang in {"csharp", "c#"}:
        return "csharp"
    if lang == "swift":
        return "swift"
    if lang == "kotlin":
        return "kotlin"
    return lang


def is_gradio_code(code: str) -> bool:
    """Detect if Python code is a Gradio app."""
    return bool(
        re.search(
            r"import\s+gradio|from\s+gradio\s+import|gr\.\s*(Interface|Blocks|TabbedInterface|ChatInterface|App)",
            code,
        )
    )


# ─── Web Document / Iframe Builder ─────────────────────────────────────

def _web_document(code: str, fence_lang: str | None) -> str:
    """Wrap code in an HTML document if needed."""
    lang = (fence_lang or "").lower()
    if lang in {"javascript", "js"}:
        return f"<!doctype html><html><body><script>\n{code}\n</script></body></html>"
    if lang == "css":
        return f"<!doctype html><html><head><style>\n{code}\n</style></head><body></body></html>"
    if re.search(r"<!doctype|<html[\s>]", code, flags=re.IGNORECASE):
        return code
    return f"<!doctype html><html><head><meta charset='utf-8'></head><body>\n{code}\n</body></html>"


def build_iframe(code: str, fence_lang: str | None = None) -> str:
    """Build a sandboxed iframe HTML string for web preview."""
    document = _web_document(code, fence_lang)
    srcdoc = html.escape(document, quote=True)
    return (
        '<iframe class="web-frame" '
        'sandbox="allow-scripts" '
        'allow="fullscreen" '
        "allowfullscreen "
        f'srcdoc="{srcdoc}" '
        'style="width:100%; min-height:680px; border:0; border-radius:14px; '
        'background:white;"></iframe>'
    )