akhaliq HF Staff commited on
Commit
2e098e6
Β·
1 Parent(s): 3a2235f
Files changed (2) hide show
  1. backend_parsers.py +60 -5
  2. backend_prompts.py +28 -8
backend_parsers.py CHANGED
@@ -15,11 +15,66 @@ def parse_transformers_js_output(code: str) -> Dict[str, str]:
15
  Uses comprehensive parsing patterns to handle various LLM output formats.
16
  Updated to use transformers.js v3.8.0 CDN.
17
  """
18
- files = {
19
- 'index.html': '',
20
- 'index.js': '',
21
- 'style.css': ''
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Multiple patterns to match the three code blocks with different variations
25
  html_patterns = [
 
15
  Uses comprehensive parsing patterns to handle various LLM output formats.
16
  Updated to use transformers.js v3.8.0 CDN.
17
  """
18
+ # Check if code starts with HTML instead of markers (common LLM mistake)
19
+ if code.strip().startswith('<!DOCTYPE') or code.strip().startswith('<html'):
20
+ print("[Parser] WARNING: Code starts with HTML instead of === index.html === marker")
21
+ print("[Parser] Attempting to extract files from malformed output...")
22
+
23
+ # Try to split by === markers that do exist
24
+ if '=== index.js ===' in code and '=== style.css ===' in code:
25
+ # Extract HTML as everything before === index.js ===
26
+ html_end = code.find('=== index.js ===')
27
+ html_content = code[:html_end].strip()
28
+
29
+ # Extract JS between === index.js === and === style.css ===
30
+ js_start = code.find('=== index.js ===') + len('=== index.js ===')
31
+ js_end = code.find('=== style.css ===')
32
+ js_content = code[js_start:js_end].strip()
33
+
34
+ # Extract CSS after === style.css ===
35
+ css_start = code.find('=== style.css ===') + len('=== style.css ===')
36
+ css_content = code[css_start:].strip()
37
+
38
+ print(f"[Parser] Recovered HTML: {len(html_content)} chars")
39
+ print(f"[Parser] Recovered JS: {len(js_content)} chars")
40
+ print(f"[Parser] Recovered CSS: {len(css_content)} chars")
41
+
42
+ files = {
43
+ 'index.html': html_content,
44
+ 'index.js': js_content,
45
+ 'style.css': css_content
46
+ }
47
+
48
+ # Normalize imports and return early since we've already parsed everything
49
+ cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0"
50
+ for file_key in ['index.html', 'index.js']:
51
+ if files[file_key]:
52
+ content = files[file_key]
53
+ content = re.sub(
54
+ r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]",
55
+ f"from '{cdn_url}'",
56
+ content
57
+ )
58
+ content = re.sub(
59
+ r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]",
60
+ f"from '{cdn_url}'",
61
+ content
62
+ )
63
+ files[file_key] = content
64
+
65
+ return files
66
+ else:
67
+ files = {
68
+ 'index.html': '',
69
+ 'index.js': '',
70
+ 'style.css': ''
71
+ }
72
+ else:
73
+ files = {
74
+ 'index.html': '',
75
+ 'index.js': '',
76
+ 'style.css': ''
77
+ }
78
 
79
  # Multiple patterns to match the three code blocks with different variations
80
  html_patterns = [
backend_prompts.py CHANGED
@@ -35,7 +35,11 @@ TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a tr
35
  - Generating a README.md will break the deployment process
36
 
37
  **🚨 CRITICAL: Required Output Format**
38
- You MUST output ALL THREE files using this EXACT format with === markers:
 
 
 
 
39
 
40
  === index.html ===
41
  <!DOCTYPE html>
@@ -62,13 +66,29 @@ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers
62
  /* Your complete CSS styles here */
63
  /* Include all styling for the application */
64
 
65
- **CRITICAL FORMATTING RULES:**
66
- 1. Start each file IMMEDIATELY after the === marker (on the next line)
67
- 2. DO NOT use markdown code blocks (```html, ```javascript, ```css) - these will cause parsing errors
68
- 3. DO NOT leave any file empty - each file MUST contain complete, functional code
69
- 4. ONLY use the === filename === format shown above
70
- 5. Make sure there is a blank line between each file section
71
- 6. Each file must be complete and ready to deploy - no placeholders or comments like "// add code here"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  **Example of CORRECT format:**
74
  === index.html ===
 
35
  - Generating a README.md will break the deployment process
36
 
37
  **🚨 CRITICAL: Required Output Format**
38
+
39
+ **THE VERY FIRST LINE of your response MUST be: === index.html ===**
40
+
41
+ You MUST output ALL THREE files using this EXACT format with === markers.
42
+ Your response must start IMMEDIATELY with the === index.html === marker.
43
 
44
  === index.html ===
45
  <!DOCTYPE html>
 
66
  /* Your complete CSS styles here */
67
  /* Include all styling for the application */
68
 
69
+ **🚨 CRITICAL FORMATTING RULES (MUST FOLLOW EXACTLY):**
70
+ 1. **FIRST LINE MUST BE: === index.html ===** (no explanations, no code before this)
71
+ 2. Start each file's code IMMEDIATELY on the line after the === marker
72
+ 3. **NEVER use markdown code blocks** (```html, ```javascript, ```css) - these will cause parsing errors
73
+ 4. **NEVER leave any file empty** - each file MUST contain complete, functional code
74
+ 5. **ONLY use the === filename === markers** - do not add any other formatting
75
+ 6. Add a blank line between each file section
76
+ 7. Each file must be complete and ready to deploy - no placeholders or "// TODO" comments
77
+
78
+ **WRONG FORMAT (DO NOT DO THIS):**
79
+ <!DOCTYPE html>
80
+ <html>...
81
+
82
+ === index.js ===
83
+ ...
84
+
85
+ **CORRECT FORMAT (DO THIS):**
86
+ === index.html ===
87
+ <!DOCTYPE html>
88
+ <html>...
89
+
90
+ === index.js ===
91
+ ...
92
 
93
  **Example of CORRECT format:**
94
  === index.html ===